linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#include <linux/module.h>
  84#include <linux/kernel.h>
  85#include <linux/signal.h>
  86#include <linux/sched.h>
  87#include <linux/errno.h>
  88#include <linux/string.h>
  89#include <linux/stat.h>
  90#include <linux/dcache.h>
  91#include <linux/namei.h>
  92#include <linux/socket.h>
  93#include <linux/un.h>
  94#include <linux/fcntl.h>
  95#include <linux/termios.h>
  96#include <linux/sockios.h>
  97#include <linux/net.h>
  98#include <linux/in.h>
  99#include <linux/fs.h>
 100#include <linux/slab.h>
 101#include <asm/uaccess.h>
 102#include <linux/skbuff.h>
 103#include <linux/netdevice.h>
 104#include <net/net_namespace.h>
 105#include <net/sock.h>
 106#include <net/tcp_states.h>
 107#include <net/af_unix.h>
 108#include <linux/proc_fs.h>
 109#include <linux/seq_file.h>
 110#include <net/scm.h>
 111#include <linux/init.h>
 112#include <linux/poll.h>
 113#include <linux/rtnetlink.h>
 114#include <linux/mount.h>
 115#include <net/checksum.h>
 116#include <linux/security.h>
 117
 118struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 119EXPORT_SYMBOL_GPL(unix_socket_table);
 120DEFINE_SPINLOCK(unix_table_lock);
 121EXPORT_SYMBOL_GPL(unix_table_lock);
 122static atomic_long_t unix_nr_socks;
 123
 124
 125static struct hlist_head *unix_sockets_unbound(void *addr)
 126{
 127        unsigned long hash = (unsigned long)addr;
 128
 129        hash ^= hash >> 16;
 130        hash ^= hash >> 8;
 131        hash %= UNIX_HASH_SIZE;
 132        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 133}
 134
 135#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 136
 137#ifdef CONFIG_SECURITY_NETWORK
 138static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 139{
 140        memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 141}
 142
 143static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 144{
 145        scm->secid = *UNIXSID(skb);
 146}
 147#else
 148static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 149{ }
 150
 151static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 152{ }
 153#endif /* CONFIG_SECURITY_NETWORK */
 154
 155/*
 156 *  SMP locking strategy:
 157 *    hash table is protected with spinlock unix_table_lock
 158 *    each socket state is protected by separate spin lock.
 159 */
 160
 161static inline unsigned int unix_hash_fold(__wsum n)
 162{
 163        unsigned int hash = (__force unsigned int)n;
 164
 165        hash ^= hash>>16;
 166        hash ^= hash>>8;
 167        return hash&(UNIX_HASH_SIZE-1);
 168}
 169
 170#define unix_peer(sk) (unix_sk(sk)->peer)
 171
 172static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 173{
 174        return unix_peer(osk) == sk;
 175}
 176
 177static inline int unix_may_send(struct sock *sk, struct sock *osk)
 178{
 179        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 180}
 181
 182static inline int unix_recvq_full(struct sock const *sk)
 183{
 184        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 185}
 186
 187struct sock *unix_peer_get(struct sock *s)
 188{
 189        struct sock *peer;
 190
 191        unix_state_lock(s);
 192        peer = unix_peer(s);
 193        if (peer)
 194                sock_hold(peer);
 195        unix_state_unlock(s);
 196        return peer;
 197}
 198EXPORT_SYMBOL_GPL(unix_peer_get);
 199
 200static inline void unix_release_addr(struct unix_address *addr)
 201{
 202        if (atomic_dec_and_test(&addr->refcnt))
 203                kfree(addr);
 204}
 205
 206/*
 207 *      Check unix socket name:
 208 *              - should be not zero length.
 209 *              - if started by not zero, should be NULL terminated (FS object)
 210 *              - if started by zero, it is abstract name.
 211 */
 212
 213static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 214{
 215        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 216                return -EINVAL;
 217        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 218                return -EINVAL;
 219        if (sunaddr->sun_path[0]) {
 220                /*
 221                 * This may look like an off by one error but it is a bit more
 222                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 223                 * sun_path[108] doesn't as such exist.  However in kernel space
 224                 * we are guaranteed that it is a valid memory location in our
 225                 * kernel address buffer.
 226                 */
 227                ((char *)sunaddr)[len] = 0;
 228                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 229                return len;
 230        }
 231
 232        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 233        return len;
 234}
 235
 236static void __unix_remove_socket(struct sock *sk)
 237{
 238        sk_del_node_init(sk);
 239}
 240
 241static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242{
 243        WARN_ON(!sk_unhashed(sk));
 244        sk_add_node(sk, list);
 245}
 246
 247static inline void unix_remove_socket(struct sock *sk)
 248{
 249        spin_lock(&unix_table_lock);
 250        __unix_remove_socket(sk);
 251        spin_unlock(&unix_table_lock);
 252}
 253
 254static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255{
 256        spin_lock(&unix_table_lock);
 257        __unix_insert_socket(list, sk);
 258        spin_unlock(&unix_table_lock);
 259}
 260
 261static struct sock *__unix_find_socket_byname(struct net *net,
 262                                              struct sockaddr_un *sunname,
 263                                              int len, int type, unsigned int hash)
 264{
 265        struct sock *s;
 266        struct hlist_node *node;
 267
 268        sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 269                struct unix_sock *u = unix_sk(s);
 270
 271                if (!net_eq(sock_net(s), net))
 272                        continue;
 273
 274                if (u->addr->len == len &&
 275                    !memcmp(u->addr->name, sunname, len))
 276                        goto found;
 277        }
 278        s = NULL;
 279found:
 280        return s;
 281}
 282
 283static inline struct sock *unix_find_socket_byname(struct net *net,
 284                                                   struct sockaddr_un *sunname,
 285                                                   int len, int type,
 286                                                   unsigned int hash)
 287{
 288        struct sock *s;
 289
 290        spin_lock(&unix_table_lock);
 291        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 292        if (s)
 293                sock_hold(s);
 294        spin_unlock(&unix_table_lock);
 295        return s;
 296}
 297
 298static struct sock *unix_find_socket_byinode(struct inode *i)
 299{
 300        struct sock *s;
 301        struct hlist_node *node;
 302
 303        spin_lock(&unix_table_lock);
 304        sk_for_each(s, node,
 305                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 306                struct dentry *dentry = unix_sk(s)->path.dentry;
 307
 308                if (dentry && dentry->d_inode == i) {
 309                        sock_hold(s);
 310                        goto found;
 311                }
 312        }
 313        s = NULL;
 314found:
 315        spin_unlock(&unix_table_lock);
 316        return s;
 317}
 318
 319static inline int unix_writable(struct sock *sk)
 320{
 321        return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 322}
 323
 324static void unix_write_space(struct sock *sk)
 325{
 326        struct socket_wq *wq;
 327
 328        rcu_read_lock();
 329        if (unix_writable(sk)) {
 330                wq = rcu_dereference(sk->sk_wq);
 331                if (wq_has_sleeper(wq))
 332                        wake_up_interruptible_sync_poll(&wq->wait,
 333                                POLLOUT | POLLWRNORM | POLLWRBAND);
 334                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 335        }
 336        rcu_read_unlock();
 337}
 338
 339/* When dgram socket disconnects (or changes its peer), we clear its receive
 340 * queue of packets arrived from previous peer. First, it allows to do
 341 * flow control based only on wmem_alloc; second, sk connected to peer
 342 * may receive messages only from that peer. */
 343static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 344{
 345        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 346                skb_queue_purge(&sk->sk_receive_queue);
 347                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 348
 349                /* If one link of bidirectional dgram pipe is disconnected,
 350                 * we signal error. Messages are lost. Do not make this,
 351                 * when peer was not connected to us.
 352                 */
 353                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 354                        other->sk_err = ECONNRESET;
 355                        other->sk_error_report(other);
 356                }
 357        }
 358}
 359
 360static void unix_sock_destructor(struct sock *sk)
 361{
 362        struct unix_sock *u = unix_sk(sk);
 363
 364        skb_queue_purge(&sk->sk_receive_queue);
 365
 366        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 367        WARN_ON(!sk_unhashed(sk));
 368        WARN_ON(sk->sk_socket);
 369        if (!sock_flag(sk, SOCK_DEAD)) {
 370                printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 371                return;
 372        }
 373
 374        if (u->addr)
 375                unix_release_addr(u->addr);
 376
 377        atomic_long_dec(&unix_nr_socks);
 378        local_bh_disable();
 379        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 380        local_bh_enable();
 381#ifdef UNIX_REFCNT_DEBUG
 382        printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
 383                atomic_long_read(&unix_nr_socks));
 384#endif
 385}
 386
 387static int unix_release_sock(struct sock *sk, int embrion)
 388{
 389        struct unix_sock *u = unix_sk(sk);
 390        struct path path;
 391        struct sock *skpair;
 392        struct sk_buff *skb;
 393        int state;
 394
 395        unix_remove_socket(sk);
 396
 397        /* Clear state */
 398        unix_state_lock(sk);
 399        sock_orphan(sk);
 400        sk->sk_shutdown = SHUTDOWN_MASK;
 401        path         = u->path;
 402        u->path.dentry = NULL;
 403        u->path.mnt = NULL;
 404        state = sk->sk_state;
 405        sk->sk_state = TCP_CLOSE;
 406        unix_state_unlock(sk);
 407
 408        wake_up_interruptible_all(&u->peer_wait);
 409
 410        skpair = unix_peer(sk);
 411
 412        if (skpair != NULL) {
 413                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 414                        unix_state_lock(skpair);
 415                        /* No more writes */
 416                        skpair->sk_shutdown = SHUTDOWN_MASK;
 417                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 418                                skpair->sk_err = ECONNRESET;
 419                        unix_state_unlock(skpair);
 420                        skpair->sk_state_change(skpair);
 421                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 422                }
 423                sock_put(skpair); /* It may now die */
 424                unix_peer(sk) = NULL;
 425        }
 426
 427        /* Try to flush out this socket. Throw out buffers at least */
 428
 429        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 430                if (state == TCP_LISTEN)
 431                        unix_release_sock(skb->sk, 1);
 432                /* passed fds are erased in the kfree_skb hook        */
 433                kfree_skb(skb);
 434        }
 435
 436        if (path.dentry)
 437                path_put(&path);
 438
 439        sock_put(sk);
 440
 441        /* ---- Socket is dead now and most probably destroyed ---- */
 442
 443        /*
 444         * Fixme: BSD difference: In BSD all sockets connected to use get
 445         *        ECONNRESET and we die on the spot. In Linux we behave
 446         *        like files and pipes do and wait for the last
 447         *        dereference.
 448         *
 449         * Can't we simply set sock->err?
 450         *
 451         *        What the above comment does talk about? --ANK(980817)
 452         */
 453
 454        if (unix_tot_inflight)
 455                unix_gc();              /* Garbage collect fds */
 456
 457        return 0;
 458}
 459
 460static void init_peercred(struct sock *sk)
 461{
 462        put_pid(sk->sk_peer_pid);
 463        if (sk->sk_peer_cred)
 464                put_cred(sk->sk_peer_cred);
 465        sk->sk_peer_pid  = get_pid(task_tgid(current));
 466        sk->sk_peer_cred = get_current_cred();
 467}
 468
 469static void copy_peercred(struct sock *sk, struct sock *peersk)
 470{
 471        put_pid(sk->sk_peer_pid);
 472        if (sk->sk_peer_cred)
 473                put_cred(sk->sk_peer_cred);
 474        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 475        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 476}
 477
 478static int unix_listen(struct socket *sock, int backlog)
 479{
 480        int err;
 481        struct sock *sk = sock->sk;
 482        struct unix_sock *u = unix_sk(sk);
 483        struct pid *old_pid = NULL;
 484        const struct cred *old_cred = NULL;
 485
 486        err = -EOPNOTSUPP;
 487        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 488                goto out;       /* Only stream/seqpacket sockets accept */
 489        err = -EINVAL;
 490        if (!u->addr)
 491                goto out;       /* No listens on an unbound socket */
 492        unix_state_lock(sk);
 493        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 494                goto out_unlock;
 495        if (backlog > sk->sk_max_ack_backlog)
 496                wake_up_interruptible_all(&u->peer_wait);
 497        sk->sk_max_ack_backlog  = backlog;
 498        sk->sk_state            = TCP_LISTEN;
 499        /* set credentials so connect can copy them */
 500        init_peercred(sk);
 501        err = 0;
 502
 503out_unlock:
 504        unix_state_unlock(sk);
 505        put_pid(old_pid);
 506        if (old_cred)
 507                put_cred(old_cred);
 508out:
 509        return err;
 510}
 511
 512static int unix_release(struct socket *);
 513static int unix_bind(struct socket *, struct sockaddr *, int);
 514static int unix_stream_connect(struct socket *, struct sockaddr *,
 515                               int addr_len, int flags);
 516static int unix_socketpair(struct socket *, struct socket *);
 517static int unix_accept(struct socket *, struct socket *, int);
 518static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 519static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 520static unsigned int unix_dgram_poll(struct file *, struct socket *,
 521                                    poll_table *);
 522static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 523static int unix_shutdown(struct socket *, int);
 524static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 525                               struct msghdr *, size_t);
 526static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 527                               struct msghdr *, size_t, int);
 528static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 529                              struct msghdr *, size_t);
 530static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 531                              struct msghdr *, size_t, int);
 532static int unix_dgram_connect(struct socket *, struct sockaddr *,
 533                              int, int);
 534static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 535                                  struct msghdr *, size_t);
 536static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 537                                  struct msghdr *, size_t, int);
 538
 539static void unix_set_peek_off(struct sock *sk, int val)
 540{
 541        struct unix_sock *u = unix_sk(sk);
 542
 543        mutex_lock(&u->readlock);
 544        sk->sk_peek_off = val;
 545        mutex_unlock(&u->readlock);
 546}
 547
 548
 549static const struct proto_ops unix_stream_ops = {
 550        .family =       PF_UNIX,
 551        .owner =        THIS_MODULE,
 552        .release =      unix_release,
 553        .bind =         unix_bind,
 554        .connect =      unix_stream_connect,
 555        .socketpair =   unix_socketpair,
 556        .accept =       unix_accept,
 557        .getname =      unix_getname,
 558        .poll =         unix_poll,
 559        .ioctl =        unix_ioctl,
 560        .listen =       unix_listen,
 561        .shutdown =     unix_shutdown,
 562        .setsockopt =   sock_no_setsockopt,
 563        .getsockopt =   sock_no_getsockopt,
 564        .sendmsg =      unix_stream_sendmsg,
 565        .recvmsg =      unix_stream_recvmsg,
 566        .mmap =         sock_no_mmap,
 567        .sendpage =     sock_no_sendpage,
 568        .set_peek_off = unix_set_peek_off,
 569};
 570
 571static const struct proto_ops unix_dgram_ops = {
 572        .family =       PF_UNIX,
 573        .owner =        THIS_MODULE,
 574        .release =      unix_release,
 575        .bind =         unix_bind,
 576        .connect =      unix_dgram_connect,
 577        .socketpair =   unix_socketpair,
 578        .accept =       sock_no_accept,
 579        .getname =      unix_getname,
 580        .poll =         unix_dgram_poll,
 581        .ioctl =        unix_ioctl,
 582        .listen =       sock_no_listen,
 583        .shutdown =     unix_shutdown,
 584        .setsockopt =   sock_no_setsockopt,
 585        .getsockopt =   sock_no_getsockopt,
 586        .sendmsg =      unix_dgram_sendmsg,
 587        .recvmsg =      unix_dgram_recvmsg,
 588        .mmap =         sock_no_mmap,
 589        .sendpage =     sock_no_sendpage,
 590        .set_peek_off = unix_set_peek_off,
 591};
 592
 593static const struct proto_ops unix_seqpacket_ops = {
 594        .family =       PF_UNIX,
 595        .owner =        THIS_MODULE,
 596        .release =      unix_release,
 597        .bind =         unix_bind,
 598        .connect =      unix_stream_connect,
 599        .socketpair =   unix_socketpair,
 600        .accept =       unix_accept,
 601        .getname =      unix_getname,
 602        .poll =         unix_dgram_poll,
 603        .ioctl =        unix_ioctl,
 604        .listen =       unix_listen,
 605        .shutdown =     unix_shutdown,
 606        .setsockopt =   sock_no_setsockopt,
 607        .getsockopt =   sock_no_getsockopt,
 608        .sendmsg =      unix_seqpacket_sendmsg,
 609        .recvmsg =      unix_seqpacket_recvmsg,
 610        .mmap =         sock_no_mmap,
 611        .sendpage =     sock_no_sendpage,
 612        .set_peek_off = unix_set_peek_off,
 613};
 614
 615static struct proto unix_proto = {
 616        .name                   = "UNIX",
 617        .owner                  = THIS_MODULE,
 618        .obj_size               = sizeof(struct unix_sock),
 619};
 620
 621/*
 622 * AF_UNIX sockets do not interact with hardware, hence they
 623 * dont trigger interrupts - so it's safe for them to have
 624 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 625 * this special lock-class by reinitializing the spinlock key:
 626 */
 627static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 628
 629static struct sock *unix_create1(struct net *net, struct socket *sock)
 630{
 631        struct sock *sk = NULL;
 632        struct unix_sock *u;
 633
 634        atomic_long_inc(&unix_nr_socks);
 635        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 636                goto out;
 637
 638        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 639        if (!sk)
 640                goto out;
 641
 642        sock_init_data(sock, sk);
 643        lockdep_set_class(&sk->sk_receive_queue.lock,
 644                                &af_unix_sk_receive_queue_lock_key);
 645
 646        sk->sk_write_space      = unix_write_space;
 647        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 648        sk->sk_destruct         = unix_sock_destructor;
 649        u         = unix_sk(sk);
 650        u->path.dentry = NULL;
 651        u->path.mnt = NULL;
 652        spin_lock_init(&u->lock);
 653        atomic_long_set(&u->inflight, 0);
 654        INIT_LIST_HEAD(&u->link);
 655        mutex_init(&u->readlock); /* single task reading lock */
 656        init_waitqueue_head(&u->peer_wait);
 657        unix_insert_socket(unix_sockets_unbound(sk), sk);
 658out:
 659        if (sk == NULL)
 660                atomic_long_dec(&unix_nr_socks);
 661        else {
 662                local_bh_disable();
 663                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 664                local_bh_enable();
 665        }
 666        return sk;
 667}
 668
 669static int unix_create(struct net *net, struct socket *sock, int protocol,
 670                       int kern)
 671{
 672        if (protocol && protocol != PF_UNIX)
 673                return -EPROTONOSUPPORT;
 674
 675        sock->state = SS_UNCONNECTED;
 676
 677        switch (sock->type) {
 678        case SOCK_STREAM:
 679                sock->ops = &unix_stream_ops;
 680                break;
 681                /*
 682                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 683                 *      nothing uses it.
 684                 */
 685        case SOCK_RAW:
 686                sock->type = SOCK_DGRAM;
 687        case SOCK_DGRAM:
 688                sock->ops = &unix_dgram_ops;
 689                break;
 690        case SOCK_SEQPACKET:
 691                sock->ops = &unix_seqpacket_ops;
 692                break;
 693        default:
 694                return -ESOCKTNOSUPPORT;
 695        }
 696
 697        return unix_create1(net, sock) ? 0 : -ENOMEM;
 698}
 699
 700static int unix_release(struct socket *sock)
 701{
 702        struct sock *sk = sock->sk;
 703
 704        if (!sk)
 705                return 0;
 706
 707        sock->sk = NULL;
 708
 709        return unix_release_sock(sk, 0);
 710}
 711
 712static int unix_autobind(struct socket *sock)
 713{
 714        struct sock *sk = sock->sk;
 715        struct net *net = sock_net(sk);
 716        struct unix_sock *u = unix_sk(sk);
 717        static u32 ordernum = 1;
 718        struct unix_address *addr;
 719        int err;
 720        unsigned int retries = 0;
 721
 722        mutex_lock(&u->readlock);
 723
 724        err = 0;
 725        if (u->addr)
 726                goto out;
 727
 728        err = -ENOMEM;
 729        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 730        if (!addr)
 731                goto out;
 732
 733        addr->name->sun_family = AF_UNIX;
 734        atomic_set(&addr->refcnt, 1);
 735
 736retry:
 737        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 738        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 739
 740        spin_lock(&unix_table_lock);
 741        ordernum = (ordernum+1)&0xFFFFF;
 742
 743        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 744                                      addr->hash)) {
 745                spin_unlock(&unix_table_lock);
 746                /*
 747                 * __unix_find_socket_byname() may take long time if many names
 748                 * are already in use.
 749                 */
 750                cond_resched();
 751                /* Give up if all names seems to be in use. */
 752                if (retries++ == 0xFFFFF) {
 753                        err = -ENOSPC;
 754                        kfree(addr);
 755                        goto out;
 756                }
 757                goto retry;
 758        }
 759        addr->hash ^= sk->sk_type;
 760
 761        __unix_remove_socket(sk);
 762        u->addr = addr;
 763        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 764        spin_unlock(&unix_table_lock);
 765        err = 0;
 766
 767out:    mutex_unlock(&u->readlock);
 768        return err;
 769}
 770
 771static struct sock *unix_find_other(struct net *net,
 772                                    struct sockaddr_un *sunname, int len,
 773                                    int type, unsigned int hash, int *error)
 774{
 775        struct sock *u;
 776        struct path path;
 777        int err = 0;
 778
 779        if (sunname->sun_path[0]) {
 780                struct inode *inode;
 781                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 782                if (err)
 783                        goto fail;
 784                inode = path.dentry->d_inode;
 785                err = inode_permission(inode, MAY_WRITE);
 786                if (err)
 787                        goto put_fail;
 788
 789                err = -ECONNREFUSED;
 790                if (!S_ISSOCK(inode->i_mode))
 791                        goto put_fail;
 792                u = unix_find_socket_byinode(inode);
 793                if (!u)
 794                        goto put_fail;
 795
 796                if (u->sk_type == type)
 797                        touch_atime(&path);
 798
 799                path_put(&path);
 800
 801                err = -EPROTOTYPE;
 802                if (u->sk_type != type) {
 803                        sock_put(u);
 804                        goto fail;
 805                }
 806        } else {
 807                err = -ECONNREFUSED;
 808                u = unix_find_socket_byname(net, sunname, len, type, hash);
 809                if (u) {
 810                        struct dentry *dentry;
 811                        dentry = unix_sk(u)->path.dentry;
 812                        if (dentry)
 813                                touch_atime(&unix_sk(u)->path);
 814                } else
 815                        goto fail;
 816        }
 817        return u;
 818
 819put_fail:
 820        path_put(&path);
 821fail:
 822        *error = err;
 823        return NULL;
 824}
 825
 826static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 827{
 828        struct dentry *dentry;
 829        struct path path;
 830        int err = 0;
 831        /*
 832         * Get the parent directory, calculate the hash for last
 833         * component.
 834         */
 835        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 836        err = PTR_ERR(dentry);
 837        if (IS_ERR(dentry))
 838                return err;
 839
 840        /*
 841         * All right, let's create it.
 842         */
 843        err = security_path_mknod(&path, dentry, mode, 0);
 844        if (!err) {
 845                err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
 846                if (!err) {
 847                        res->mnt = mntget(path.mnt);
 848                        res->dentry = dget(dentry);
 849                }
 850        }
 851        done_path_create(&path, dentry);
 852        return err;
 853}
 854
 855static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 856{
 857        struct sock *sk = sock->sk;
 858        struct net *net = sock_net(sk);
 859        struct unix_sock *u = unix_sk(sk);
 860        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 861        char *sun_path = sunaddr->sun_path;
 862        int err;
 863        unsigned int hash;
 864        struct unix_address *addr;
 865        struct hlist_head *list;
 866
 867        err = -EINVAL;
 868        if (sunaddr->sun_family != AF_UNIX)
 869                goto out;
 870
 871        if (addr_len == sizeof(short)) {
 872                err = unix_autobind(sock);
 873                goto out;
 874        }
 875
 876        err = unix_mkname(sunaddr, addr_len, &hash);
 877        if (err < 0)
 878                goto out;
 879        addr_len = err;
 880
 881        mutex_lock(&u->readlock);
 882
 883        err = -EINVAL;
 884        if (u->addr)
 885                goto out_up;
 886
 887        err = -ENOMEM;
 888        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 889        if (!addr)
 890                goto out_up;
 891
 892        memcpy(addr->name, sunaddr, addr_len);
 893        addr->len = addr_len;
 894        addr->hash = hash ^ sk->sk_type;
 895        atomic_set(&addr->refcnt, 1);
 896
 897        if (sun_path[0]) {
 898                struct path path;
 899                umode_t mode = S_IFSOCK |
 900                       (SOCK_INODE(sock)->i_mode & ~current_umask());
 901                err = unix_mknod(sun_path, mode, &path);
 902                if (err) {
 903                        if (err == -EEXIST)
 904                                err = -EADDRINUSE;
 905                        unix_release_addr(addr);
 906                        goto out_up;
 907                }
 908                addr->hash = UNIX_HASH_SIZE;
 909                hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
 910                spin_lock(&unix_table_lock);
 911                u->path = path;
 912                list = &unix_socket_table[hash];
 913        } else {
 914                spin_lock(&unix_table_lock);
 915                err = -EADDRINUSE;
 916                if (__unix_find_socket_byname(net, sunaddr, addr_len,
 917                                              sk->sk_type, hash)) {
 918                        unix_release_addr(addr);
 919                        goto out_unlock;
 920                }
 921
 922                list = &unix_socket_table[addr->hash];
 923        }
 924
 925        err = 0;
 926        __unix_remove_socket(sk);
 927        u->addr = addr;
 928        __unix_insert_socket(list, sk);
 929
 930out_unlock:
 931        spin_unlock(&unix_table_lock);
 932out_up:
 933        mutex_unlock(&u->readlock);
 934out:
 935        return err;
 936}
 937
 938static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 939{
 940        if (unlikely(sk1 == sk2) || !sk2) {
 941                unix_state_lock(sk1);
 942                return;
 943        }
 944        if (sk1 < sk2) {
 945                unix_state_lock(sk1);
 946                unix_state_lock_nested(sk2);
 947        } else {
 948                unix_state_lock(sk2);
 949                unix_state_lock_nested(sk1);
 950        }
 951}
 952
 953static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 954{
 955        if (unlikely(sk1 == sk2) || !sk2) {
 956                unix_state_unlock(sk1);
 957                return;
 958        }
 959        unix_state_unlock(sk1);
 960        unix_state_unlock(sk2);
 961}
 962
 963static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 964                              int alen, int flags)
 965{
 966        struct sock *sk = sock->sk;
 967        struct net *net = sock_net(sk);
 968        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 969        struct sock *other;
 970        unsigned int hash;
 971        int err;
 972
 973        if (addr->sa_family != AF_UNSPEC) {
 974                err = unix_mkname(sunaddr, alen, &hash);
 975                if (err < 0)
 976                        goto out;
 977                alen = err;
 978
 979                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 980                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 981                        goto out;
 982
 983restart:
 984                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 985                if (!other)
 986                        goto out;
 987
 988                unix_state_double_lock(sk, other);
 989
 990                /* Apparently VFS overslept socket death. Retry. */
 991                if (sock_flag(other, SOCK_DEAD)) {
 992                        unix_state_double_unlock(sk, other);
 993                        sock_put(other);
 994                        goto restart;
 995                }
 996
 997                err = -EPERM;
 998                if (!unix_may_send(sk, other))
 999                        goto out_unlock;
1000
1001                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1002                if (err)
1003                        goto out_unlock;
1004
1005        } else {
1006                /*
1007                 *      1003.1g breaking connected state with AF_UNSPEC
1008                 */
1009                other = NULL;
1010                unix_state_double_lock(sk, other);
1011        }
1012
1013        /*
1014         * If it was connected, reconnect.
1015         */
1016        if (unix_peer(sk)) {
1017                struct sock *old_peer = unix_peer(sk);
1018                unix_peer(sk) = other;
1019                unix_state_double_unlock(sk, other);
1020
1021                if (other != old_peer)
1022                        unix_dgram_disconnected(sk, old_peer);
1023                sock_put(old_peer);
1024        } else {
1025                unix_peer(sk) = other;
1026                unix_state_double_unlock(sk, other);
1027        }
1028        return 0;
1029
1030out_unlock:
1031        unix_state_double_unlock(sk, other);
1032        sock_put(other);
1033out:
1034        return err;
1035}
1036
1037static long unix_wait_for_peer(struct sock *other, long timeo)
1038{
1039        struct unix_sock *u = unix_sk(other);
1040        int sched;
1041        DEFINE_WAIT(wait);
1042
1043        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1044
1045        sched = !sock_flag(other, SOCK_DEAD) &&
1046                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1047                unix_recvq_full(other);
1048
1049        unix_state_unlock(other);
1050
1051        if (sched)
1052                timeo = schedule_timeout(timeo);
1053
1054        finish_wait(&u->peer_wait, &wait);
1055        return timeo;
1056}
1057
1058static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1059                               int addr_len, int flags)
1060{
1061        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1062        struct sock *sk = sock->sk;
1063        struct net *net = sock_net(sk);
1064        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1065        struct sock *newsk = NULL;
1066        struct sock *other = NULL;
1067        struct sk_buff *skb = NULL;
1068        unsigned int hash;
1069        int st;
1070        int err;
1071        long timeo;
1072
1073        err = unix_mkname(sunaddr, addr_len, &hash);
1074        if (err < 0)
1075                goto out;
1076        addr_len = err;
1077
1078        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1079            (err = unix_autobind(sock)) != 0)
1080                goto out;
1081
1082        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1083
1084        /* First of all allocate resources.
1085           If we will make it after state is locked,
1086           we will have to recheck all again in any case.
1087         */
1088
1089        err = -ENOMEM;
1090
1091        /* create new sock for complete connection */
1092        newsk = unix_create1(sock_net(sk), NULL);
1093        if (newsk == NULL)
1094                goto out;
1095
1096        /* Allocate skb for sending to listening sock */
1097        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1098        if (skb == NULL)
1099                goto out;
1100
1101restart:
1102        /*  Find listening sock. */
1103        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1104        if (!other)
1105                goto out;
1106
1107        /* Latch state of peer */
1108        unix_state_lock(other);
1109
1110        /* Apparently VFS overslept socket death. Retry. */
1111        if (sock_flag(other, SOCK_DEAD)) {
1112                unix_state_unlock(other);
1113                sock_put(other);
1114                goto restart;
1115        }
1116
1117        err = -ECONNREFUSED;
1118        if (other->sk_state != TCP_LISTEN)
1119                goto out_unlock;
1120        if (other->sk_shutdown & RCV_SHUTDOWN)
1121                goto out_unlock;
1122
1123        if (unix_recvq_full(other)) {
1124                err = -EAGAIN;
1125                if (!timeo)
1126                        goto out_unlock;
1127
1128                timeo = unix_wait_for_peer(other, timeo);
1129
1130                err = sock_intr_errno(timeo);
1131                if (signal_pending(current))
1132                        goto out;
1133                sock_put(other);
1134                goto restart;
1135        }
1136
1137        /* Latch our state.
1138
1139           It is tricky place. We need to grab our state lock and cannot
1140           drop lock on peer. It is dangerous because deadlock is
1141           possible. Connect to self case and simultaneous
1142           attempt to connect are eliminated by checking socket
1143           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1144           check this before attempt to grab lock.
1145
1146           Well, and we have to recheck the state after socket locked.
1147         */
1148        st = sk->sk_state;
1149
1150        switch (st) {
1151        case TCP_CLOSE:
1152                /* This is ok... continue with connect */
1153                break;
1154        case TCP_ESTABLISHED:
1155                /* Socket is already connected */
1156                err = -EISCONN;
1157                goto out_unlock;
1158        default:
1159                err = -EINVAL;
1160                goto out_unlock;
1161        }
1162
1163        unix_state_lock_nested(sk);
1164
1165        if (sk->sk_state != st) {
1166                unix_state_unlock(sk);
1167                unix_state_unlock(other);
1168                sock_put(other);
1169                goto restart;
1170        }
1171
1172        err = security_unix_stream_connect(sk, other, newsk);
1173        if (err) {
1174                unix_state_unlock(sk);
1175                goto out_unlock;
1176        }
1177
1178        /* The way is open! Fastly set all the necessary fields... */
1179
1180        sock_hold(sk);
1181        unix_peer(newsk)        = sk;
1182        newsk->sk_state         = TCP_ESTABLISHED;
1183        newsk->sk_type          = sk->sk_type;
1184        init_peercred(newsk);
1185        newu = unix_sk(newsk);
1186        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1187        otheru = unix_sk(other);
1188
1189        /* copy address information from listening to new sock*/
1190        if (otheru->addr) {
1191                atomic_inc(&otheru->addr->refcnt);
1192                newu->addr = otheru->addr;
1193        }
1194        if (otheru->path.dentry) {
1195                path_get(&otheru->path);
1196                newu->path = otheru->path;
1197        }
1198
1199        /* Set credentials */
1200        copy_peercred(sk, other);
1201
1202        sock->state     = SS_CONNECTED;
1203        sk->sk_state    = TCP_ESTABLISHED;
1204        sock_hold(newsk);
1205
1206        smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1207        unix_peer(sk)   = newsk;
1208
1209        unix_state_unlock(sk);
1210
1211        /* take ten and and send info to listening sock */
1212        spin_lock(&other->sk_receive_queue.lock);
1213        __skb_queue_tail(&other->sk_receive_queue, skb);
1214        spin_unlock(&other->sk_receive_queue.lock);
1215        unix_state_unlock(other);
1216        other->sk_data_ready(other, 0);
1217        sock_put(other);
1218        return 0;
1219
1220out_unlock:
1221        if (other)
1222                unix_state_unlock(other);
1223
1224out:
1225        kfree_skb(skb);
1226        if (newsk)
1227                unix_release_sock(newsk, 0);
1228        if (other)
1229                sock_put(other);
1230        return err;
1231}
1232
1233static int unix_socketpair(struct socket *socka, struct socket *sockb)
1234{
1235        struct sock *ska = socka->sk, *skb = sockb->sk;
1236
1237        /* Join our sockets back to back */
1238        sock_hold(ska);
1239        sock_hold(skb);
1240        unix_peer(ska) = skb;
1241        unix_peer(skb) = ska;
1242        init_peercred(ska);
1243        init_peercred(skb);
1244
1245        if (ska->sk_type != SOCK_DGRAM) {
1246                ska->sk_state = TCP_ESTABLISHED;
1247                skb->sk_state = TCP_ESTABLISHED;
1248                socka->state  = SS_CONNECTED;
1249                sockb->state  = SS_CONNECTED;
1250        }
1251        return 0;
1252}
1253
1254static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1255{
1256        struct sock *sk = sock->sk;
1257        struct sock *tsk;
1258        struct sk_buff *skb;
1259        int err;
1260
1261        err = -EOPNOTSUPP;
1262        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1263                goto out;
1264
1265        err = -EINVAL;
1266        if (sk->sk_state != TCP_LISTEN)
1267                goto out;
1268
1269        /* If socket state is TCP_LISTEN it cannot change (for now...),
1270         * so that no locks are necessary.
1271         */
1272
1273        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1274        if (!skb) {
1275                /* This means receive shutdown. */
1276                if (err == 0)
1277                        err = -EINVAL;
1278                goto out;
1279        }
1280
1281        tsk = skb->sk;
1282        skb_free_datagram(sk, skb);
1283        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1284
1285        /* attach accepted sock to socket */
1286        unix_state_lock(tsk);
1287        newsock->state = SS_CONNECTED;
1288        sock_graft(tsk, newsock);
1289        unix_state_unlock(tsk);
1290        return 0;
1291
1292out:
1293        return err;
1294}
1295
1296
1297static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1298{
1299        struct sock *sk = sock->sk;
1300        struct unix_sock *u;
1301        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1302        int err = 0;
1303
1304        if (peer) {
1305                sk = unix_peer_get(sk);
1306
1307                err = -ENOTCONN;
1308                if (!sk)
1309                        goto out;
1310                err = 0;
1311        } else {
1312                sock_hold(sk);
1313        }
1314
1315        u = unix_sk(sk);
1316        unix_state_lock(sk);
1317        if (!u->addr) {
1318                sunaddr->sun_family = AF_UNIX;
1319                sunaddr->sun_path[0] = 0;
1320                *uaddr_len = sizeof(short);
1321        } else {
1322                struct unix_address *addr = u->addr;
1323
1324                *uaddr_len = addr->len;
1325                memcpy(sunaddr, addr->name, *uaddr_len);
1326        }
1327        unix_state_unlock(sk);
1328        sock_put(sk);
1329out:
1330        return err;
1331}
1332
1333static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1334{
1335        int i;
1336
1337        scm->fp = UNIXCB(skb).fp;
1338        UNIXCB(skb).fp = NULL;
1339
1340        for (i = scm->fp->count-1; i >= 0; i--)
1341                unix_notinflight(scm->fp->fp[i]);
1342}
1343
1344static void unix_destruct_scm(struct sk_buff *skb)
1345{
1346        struct scm_cookie scm;
1347        memset(&scm, 0, sizeof(scm));
1348        scm.pid  = UNIXCB(skb).pid;
1349        scm.cred = UNIXCB(skb).cred;
1350        if (UNIXCB(skb).fp)
1351                unix_detach_fds(&scm, skb);
1352
1353        /* Alas, it calls VFS */
1354        /* So fscking what? fput() had been SMP-safe since the last Summer */
1355        scm_destroy(&scm);
1356        sock_wfree(skb);
1357}
1358
1359#define MAX_RECURSION_LEVEL 4
1360
1361static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1362{
1363        int i;
1364        unsigned char max_level = 0;
1365        int unix_sock_count = 0;
1366
1367        for (i = scm->fp->count - 1; i >= 0; i--) {
1368                struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1369
1370                if (sk) {
1371                        unix_sock_count++;
1372                        max_level = max(max_level,
1373                                        unix_sk(sk)->recursion_level);
1374                }
1375        }
1376        if (unlikely(max_level > MAX_RECURSION_LEVEL))
1377                return -ETOOMANYREFS;
1378
1379        /*
1380         * Need to duplicate file references for the sake of garbage
1381         * collection.  Otherwise a socket in the fps might become a
1382         * candidate for GC while the skb is not yet queued.
1383         */
1384        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1385        if (!UNIXCB(skb).fp)
1386                return -ENOMEM;
1387
1388        if (unix_sock_count) {
1389                for (i = scm->fp->count - 1; i >= 0; i--)
1390                        unix_inflight(scm->fp->fp[i]);
1391        }
1392        return max_level;
1393}
1394
1395static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1396{
1397        int err = 0;
1398
1399        UNIXCB(skb).pid  = get_pid(scm->pid);
1400        if (scm->cred)
1401                UNIXCB(skb).cred = get_cred(scm->cred);
1402        UNIXCB(skb).fp = NULL;
1403        if (scm->fp && send_fds)
1404                err = unix_attach_fds(scm, skb);
1405
1406        skb->destructor = unix_destruct_scm;
1407        return err;
1408}
1409
1410/*
1411 * Some apps rely on write() giving SCM_CREDENTIALS
1412 * We include credentials if source or destination socket
1413 * asserted SOCK_PASSCRED.
1414 */
1415static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1416                            const struct sock *other)
1417{
1418        if (UNIXCB(skb).cred)
1419                return;
1420        if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1421            !other->sk_socket ||
1422            test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1423                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1424                UNIXCB(skb).cred = get_current_cred();
1425        }
1426}
1427
1428/*
1429 *      Send AF_UNIX data.
1430 */
1431
1432static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1433                              struct msghdr *msg, size_t len)
1434{
1435        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1436        struct sock *sk = sock->sk;
1437        struct net *net = sock_net(sk);
1438        struct unix_sock *u = unix_sk(sk);
1439        struct sockaddr_un *sunaddr = msg->msg_name;
1440        struct sock *other = NULL;
1441        int namelen = 0; /* fake GCC */
1442        int err;
1443        unsigned int hash;
1444        struct sk_buff *skb;
1445        long timeo;
1446        struct scm_cookie tmp_scm;
1447        int max_level;
1448        int data_len = 0;
1449
1450        if (NULL == siocb->scm)
1451                siocb->scm = &tmp_scm;
1452        wait_for_unix_gc();
1453        err = scm_send(sock, msg, siocb->scm, false);
1454        if (err < 0)
1455                return err;
1456
1457        err = -EOPNOTSUPP;
1458        if (msg->msg_flags&MSG_OOB)
1459                goto out;
1460
1461        if (msg->msg_namelen) {
1462                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1463                if (err < 0)
1464                        goto out;
1465                namelen = err;
1466        } else {
1467                sunaddr = NULL;
1468                err = -ENOTCONN;
1469                other = unix_peer_get(sk);
1470                if (!other)
1471                        goto out;
1472        }
1473
1474        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1475            && (err = unix_autobind(sock)) != 0)
1476                goto out;
1477
1478        err = -EMSGSIZE;
1479        if (len > sk->sk_sndbuf - 32)
1480                goto out;
1481
1482        if (len > SKB_MAX_ALLOC)
1483                data_len = min_t(size_t,
1484                                 len - SKB_MAX_ALLOC,
1485                                 MAX_SKB_FRAGS * PAGE_SIZE);
1486
1487        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1488                                   msg->msg_flags & MSG_DONTWAIT, &err);
1489        if (skb == NULL)
1490                goto out;
1491
1492        err = unix_scm_to_skb(siocb->scm, skb, true);
1493        if (err < 0)
1494                goto out_free;
1495        max_level = err + 1;
1496        unix_get_secdata(siocb->scm, skb);
1497
1498        skb_put(skb, len - data_len);
1499        skb->data_len = data_len;
1500        skb->len = len;
1501        err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1502        if (err)
1503                goto out_free;
1504
1505        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1506
1507restart:
1508        if (!other) {
1509                err = -ECONNRESET;
1510                if (sunaddr == NULL)
1511                        goto out_free;
1512
1513                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1514                                        hash, &err);
1515                if (other == NULL)
1516                        goto out_free;
1517        }
1518
1519        if (sk_filter(other, skb) < 0) {
1520                /* Toss the packet but do not return any error to the sender */
1521                err = len;
1522                goto out_free;
1523        }
1524
1525        unix_state_lock(other);
1526        err = -EPERM;
1527        if (!unix_may_send(sk, other))
1528                goto out_unlock;
1529
1530        if (sock_flag(other, SOCK_DEAD)) {
1531                /*
1532                 *      Check with 1003.1g - what should
1533                 *      datagram error
1534                 */
1535                unix_state_unlock(other);
1536                sock_put(other);
1537
1538                err = 0;
1539                unix_state_lock(sk);
1540                if (unix_peer(sk) == other) {
1541                        unix_peer(sk) = NULL;
1542                        unix_state_unlock(sk);
1543
1544                        unix_dgram_disconnected(sk, other);
1545                        sock_put(other);
1546                        err = -ECONNREFUSED;
1547                } else {
1548                        unix_state_unlock(sk);
1549                }
1550
1551                other = NULL;
1552                if (err)
1553                        goto out_free;
1554                goto restart;
1555        }
1556
1557        err = -EPIPE;
1558        if (other->sk_shutdown & RCV_SHUTDOWN)
1559                goto out_unlock;
1560
1561        if (sk->sk_type != SOCK_SEQPACKET) {
1562                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1563                if (err)
1564                        goto out_unlock;
1565        }
1566
1567        if (unix_peer(other) != sk && unix_recvq_full(other)) {
1568                if (!timeo) {
1569                        err = -EAGAIN;
1570                        goto out_unlock;
1571                }
1572
1573                timeo = unix_wait_for_peer(other, timeo);
1574
1575                err = sock_intr_errno(timeo);
1576                if (signal_pending(current))
1577                        goto out_free;
1578
1579                goto restart;
1580        }
1581
1582        if (sock_flag(other, SOCK_RCVTSTAMP))
1583                __net_timestamp(skb);
1584        maybe_add_creds(skb, sock, other);
1585        skb_queue_tail(&other->sk_receive_queue, skb);
1586        if (max_level > unix_sk(other)->recursion_level)
1587                unix_sk(other)->recursion_level = max_level;
1588        unix_state_unlock(other);
1589        other->sk_data_ready(other, len);
1590        sock_put(other);
1591        scm_destroy(siocb->scm);
1592        return len;
1593
1594out_unlock:
1595        unix_state_unlock(other);
1596out_free:
1597        kfree_skb(skb);
1598out:
1599        if (other)
1600                sock_put(other);
1601        scm_destroy(siocb->scm);
1602        return err;
1603}
1604
1605
1606static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1607                               struct msghdr *msg, size_t len)
1608{
1609        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1610        struct sock *sk = sock->sk;
1611        struct sock *other = NULL;
1612        int err, size;
1613        struct sk_buff *skb;
1614        int sent = 0;
1615        struct scm_cookie tmp_scm;
1616        bool fds_sent = false;
1617        int max_level;
1618
1619        if (NULL == siocb->scm)
1620                siocb->scm = &tmp_scm;
1621        wait_for_unix_gc();
1622        err = scm_send(sock, msg, siocb->scm, false);
1623        if (err < 0)
1624                return err;
1625
1626        err = -EOPNOTSUPP;
1627        if (msg->msg_flags&MSG_OOB)
1628                goto out_err;
1629
1630        if (msg->msg_namelen) {
1631                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1632                goto out_err;
1633        } else {
1634                err = -ENOTCONN;
1635                other = unix_peer(sk);
1636                if (!other)
1637                        goto out_err;
1638        }
1639
1640        if (sk->sk_shutdown & SEND_SHUTDOWN)
1641                goto pipe_err;
1642
1643        while (sent < len) {
1644                /*
1645                 *      Optimisation for the fact that under 0.01% of X
1646                 *      messages typically need breaking up.
1647                 */
1648
1649                size = len-sent;
1650
1651                /* Keep two messages in the pipe so it schedules better */
1652                if (size > ((sk->sk_sndbuf >> 1) - 64))
1653                        size = (sk->sk_sndbuf >> 1) - 64;
1654
1655                if (size > SKB_MAX_ALLOC)
1656                        size = SKB_MAX_ALLOC;
1657
1658                /*
1659                 *      Grab a buffer
1660                 */
1661
1662                skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1663                                          &err);
1664
1665                if (skb == NULL)
1666                        goto out_err;
1667
1668                /*
1669                 *      If you pass two values to the sock_alloc_send_skb
1670                 *      it tries to grab the large buffer with GFP_NOFS
1671                 *      (which can fail easily), and if it fails grab the
1672                 *      fallback size buffer which is under a page and will
1673                 *      succeed. [Alan]
1674                 */
1675                size = min_t(int, size, skb_tailroom(skb));
1676
1677
1678                /* Only send the fds in the first buffer */
1679                err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1680                if (err < 0) {
1681                        kfree_skb(skb);
1682                        goto out_err;
1683                }
1684                max_level = err + 1;
1685                fds_sent = true;
1686
1687                err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1688                if (err) {
1689                        kfree_skb(skb);
1690                        goto out_err;
1691                }
1692
1693                unix_state_lock(other);
1694
1695                if (sock_flag(other, SOCK_DEAD) ||
1696                    (other->sk_shutdown & RCV_SHUTDOWN))
1697                        goto pipe_err_free;
1698
1699                maybe_add_creds(skb, sock, other);
1700                skb_queue_tail(&other->sk_receive_queue, skb);
1701                if (max_level > unix_sk(other)->recursion_level)
1702                        unix_sk(other)->recursion_level = max_level;
1703                unix_state_unlock(other);
1704                other->sk_data_ready(other, size);
1705                sent += size;
1706        }
1707
1708        scm_destroy(siocb->scm);
1709        siocb->scm = NULL;
1710
1711        return sent;
1712
1713pipe_err_free:
1714        unix_state_unlock(other);
1715        kfree_skb(skb);
1716pipe_err:
1717        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1718                send_sig(SIGPIPE, current, 0);
1719        err = -EPIPE;
1720out_err:
1721        scm_destroy(siocb->scm);
1722        siocb->scm = NULL;
1723        return sent ? : err;
1724}
1725
1726static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1727                                  struct msghdr *msg, size_t len)
1728{
1729        int err;
1730        struct sock *sk = sock->sk;
1731
1732        err = sock_error(sk);
1733        if (err)
1734                return err;
1735
1736        if (sk->sk_state != TCP_ESTABLISHED)
1737                return -ENOTCONN;
1738
1739        if (msg->msg_namelen)
1740                msg->msg_namelen = 0;
1741
1742        return unix_dgram_sendmsg(kiocb, sock, msg, len);
1743}
1744
1745static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1746                              struct msghdr *msg, size_t size,
1747                              int flags)
1748{
1749        struct sock *sk = sock->sk;
1750
1751        if (sk->sk_state != TCP_ESTABLISHED)
1752                return -ENOTCONN;
1753
1754        return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1755}
1756
1757static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1758{
1759        struct unix_sock *u = unix_sk(sk);
1760
1761        msg->msg_namelen = 0;
1762        if (u->addr) {
1763                msg->msg_namelen = u->addr->len;
1764                memcpy(msg->msg_name, u->addr->name, u->addr->len);
1765        }
1766}
1767
1768static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1769                              struct msghdr *msg, size_t size,
1770                              int flags)
1771{
1772        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1773        struct scm_cookie tmp_scm;
1774        struct sock *sk = sock->sk;
1775        struct unix_sock *u = unix_sk(sk);
1776        int noblock = flags & MSG_DONTWAIT;
1777        struct sk_buff *skb;
1778        int err;
1779        int peeked, skip;
1780
1781        err = -EOPNOTSUPP;
1782        if (flags&MSG_OOB)
1783                goto out;
1784
1785        msg->msg_namelen = 0;
1786
1787        err = mutex_lock_interruptible(&u->readlock);
1788        if (err) {
1789                err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1790                goto out;
1791        }
1792
1793        skip = sk_peek_offset(sk, flags);
1794
1795        skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1796        if (!skb) {
1797                unix_state_lock(sk);
1798                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1799                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1800                    (sk->sk_shutdown & RCV_SHUTDOWN))
1801                        err = 0;
1802                unix_state_unlock(sk);
1803                goto out_unlock;
1804        }
1805
1806        wake_up_interruptible_sync_poll(&u->peer_wait,
1807                                        POLLOUT | POLLWRNORM | POLLWRBAND);
1808
1809        if (msg->msg_name)
1810                unix_copy_addr(msg, skb->sk);
1811
1812        if (size > skb->len - skip)
1813                size = skb->len - skip;
1814        else if (size < skb->len - skip)
1815                msg->msg_flags |= MSG_TRUNC;
1816
1817        err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1818        if (err)
1819                goto out_free;
1820
1821        if (sock_flag(sk, SOCK_RCVTSTAMP))
1822                __sock_recv_timestamp(msg, sk, skb);
1823
1824        if (!siocb->scm) {
1825                siocb->scm = &tmp_scm;
1826                memset(&tmp_scm, 0, sizeof(tmp_scm));
1827        }
1828        scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1829        unix_set_secdata(siocb->scm, skb);
1830
1831        if (!(flags & MSG_PEEK)) {
1832                if (UNIXCB(skb).fp)
1833                        unix_detach_fds(siocb->scm, skb);
1834
1835                sk_peek_offset_bwd(sk, skb->len);
1836        } else {
1837                /* It is questionable: on PEEK we could:
1838                   - do not return fds - good, but too simple 8)
1839                   - return fds, and do not return them on read (old strategy,
1840                     apparently wrong)
1841                   - clone fds (I chose it for now, it is the most universal
1842                     solution)
1843
1844                   POSIX 1003.1g does not actually define this clearly
1845                   at all. POSIX 1003.1g doesn't define a lot of things
1846                   clearly however!
1847
1848                */
1849
1850                sk_peek_offset_fwd(sk, size);
1851
1852                if (UNIXCB(skb).fp)
1853                        siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1854        }
1855        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1856
1857        scm_recv(sock, msg, siocb->scm, flags);
1858
1859out_free:
1860        skb_free_datagram(sk, skb);
1861out_unlock:
1862        mutex_unlock(&u->readlock);
1863out:
1864        return err;
1865}
1866
1867/*
1868 *      Sleep until data has arrive. But check for races..
1869 */
1870
1871static long unix_stream_data_wait(struct sock *sk, long timeo)
1872{
1873        DEFINE_WAIT(wait);
1874
1875        unix_state_lock(sk);
1876
1877        for (;;) {
1878                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1879
1880                if (!skb_queue_empty(&sk->sk_receive_queue) ||
1881                    sk->sk_err ||
1882                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1883                    signal_pending(current) ||
1884                    !timeo)
1885                        break;
1886
1887                set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1888                unix_state_unlock(sk);
1889                timeo = schedule_timeout(timeo);
1890                unix_state_lock(sk);
1891                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1892        }
1893
1894        finish_wait(sk_sleep(sk), &wait);
1895        unix_state_unlock(sk);
1896        return timeo;
1897}
1898
1899
1900
1901static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1902                               struct msghdr *msg, size_t size,
1903                               int flags)
1904{
1905        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1906        struct scm_cookie tmp_scm;
1907        struct sock *sk = sock->sk;
1908        struct unix_sock *u = unix_sk(sk);
1909        struct sockaddr_un *sunaddr = msg->msg_name;
1910        int copied = 0;
1911        int check_creds = 0;
1912        int target;
1913        int err = 0;
1914        long timeo;
1915        int skip;
1916
1917        err = -EINVAL;
1918        if (sk->sk_state != TCP_ESTABLISHED)
1919                goto out;
1920
1921        err = -EOPNOTSUPP;
1922        if (flags&MSG_OOB)
1923                goto out;
1924
1925        target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1926        timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1927
1928        msg->msg_namelen = 0;
1929
1930        /* Lock the socket to prevent queue disordering
1931         * while sleeps in memcpy_tomsg
1932         */
1933
1934        if (!siocb->scm) {
1935                siocb->scm = &tmp_scm;
1936                memset(&tmp_scm, 0, sizeof(tmp_scm));
1937        }
1938
1939        err = mutex_lock_interruptible(&u->readlock);
1940        if (err) {
1941                err = sock_intr_errno(timeo);
1942                goto out;
1943        }
1944
1945        skip = sk_peek_offset(sk, flags);
1946
1947        do {
1948                int chunk;
1949                struct sk_buff *skb;
1950
1951                unix_state_lock(sk);
1952                skb = skb_peek(&sk->sk_receive_queue);
1953again:
1954                if (skb == NULL) {
1955                        unix_sk(sk)->recursion_level = 0;
1956                        if (copied >= target)
1957                                goto unlock;
1958
1959                        /*
1960                         *      POSIX 1003.1g mandates this order.
1961                         */
1962
1963                        err = sock_error(sk);
1964                        if (err)
1965                                goto unlock;
1966                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1967                                goto unlock;
1968
1969                        unix_state_unlock(sk);
1970                        err = -EAGAIN;
1971                        if (!timeo)
1972                                break;
1973                        mutex_unlock(&u->readlock);
1974
1975                        timeo = unix_stream_data_wait(sk, timeo);
1976
1977                        if (signal_pending(current)
1978                            ||  mutex_lock_interruptible(&u->readlock)) {
1979                                err = sock_intr_errno(timeo);
1980                                goto out;
1981                        }
1982
1983                        continue;
1984 unlock:
1985                        unix_state_unlock(sk);
1986                        break;
1987                }
1988
1989                if (skip >= skb->len) {
1990                        skip -= skb->len;
1991                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
1992                        goto again;
1993                }
1994
1995                unix_state_unlock(sk);
1996
1997                if (check_creds) {
1998                        /* Never glue messages from different writers */
1999                        if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
2000                            (UNIXCB(skb).cred != siocb->scm->cred))
2001                                break;
2002                } else {
2003                        /* Copy credentials */
2004                        scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
2005                        check_creds = 1;
2006                }
2007
2008                /* Copy address just once */
2009                if (sunaddr) {
2010                        unix_copy_addr(msg, skb->sk);
2011                        sunaddr = NULL;
2012                }
2013
2014                chunk = min_t(unsigned int, skb->len - skip, size);
2015                if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2016                        if (copied == 0)
2017                                copied = -EFAULT;
2018                        break;
2019                }
2020                copied += chunk;
2021                size -= chunk;
2022
2023                /* Mark read part of skb as used */
2024                if (!(flags & MSG_PEEK)) {
2025                        skb_pull(skb, chunk);
2026
2027                        sk_peek_offset_bwd(sk, chunk);
2028
2029                        if (UNIXCB(skb).fp)
2030                                unix_detach_fds(siocb->scm, skb);
2031
2032                        if (skb->len)
2033                                break;
2034
2035                        skb_unlink(skb, &sk->sk_receive_queue);
2036                        consume_skb(skb);
2037
2038                        if (siocb->scm->fp)
2039                                break;
2040                } else {
2041                        /* It is questionable, see note in unix_dgram_recvmsg.
2042                         */
2043                        if (UNIXCB(skb).fp)
2044                                siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2045
2046                        sk_peek_offset_fwd(sk, chunk);
2047
2048                        break;
2049                }
2050        } while (size);
2051
2052        mutex_unlock(&u->readlock);
2053        scm_recv(sock, msg, siocb->scm, flags);
2054out:
2055        return copied ? : err;
2056}
2057
2058static int unix_shutdown(struct socket *sock, int mode)
2059{
2060        struct sock *sk = sock->sk;
2061        struct sock *other;
2062
2063        mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2064
2065        if (!mode)
2066                return 0;
2067
2068        unix_state_lock(sk);
2069        sk->sk_shutdown |= mode;
2070        other = unix_peer(sk);
2071        if (other)
2072                sock_hold(other);
2073        unix_state_unlock(sk);
2074        sk->sk_state_change(sk);
2075
2076        if (other &&
2077                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2078
2079                int peer_mode = 0;
2080
2081                if (mode&RCV_SHUTDOWN)
2082                        peer_mode |= SEND_SHUTDOWN;
2083                if (mode&SEND_SHUTDOWN)
2084                        peer_mode |= RCV_SHUTDOWN;
2085                unix_state_lock(other);
2086                other->sk_shutdown |= peer_mode;
2087                unix_state_unlock(other);
2088                other->sk_state_change(other);
2089                if (peer_mode == SHUTDOWN_MASK)
2090                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2091                else if (peer_mode & RCV_SHUTDOWN)
2092                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2093        }
2094        if (other)
2095                sock_put(other);
2096
2097        return 0;
2098}
2099
2100long unix_inq_len(struct sock *sk)
2101{
2102        struct sk_buff *skb;
2103        long amount = 0;
2104
2105        if (sk->sk_state == TCP_LISTEN)
2106                return -EINVAL;
2107
2108        spin_lock(&sk->sk_receive_queue.lock);
2109        if (sk->sk_type == SOCK_STREAM ||
2110            sk->sk_type == SOCK_SEQPACKET) {
2111                skb_queue_walk(&sk->sk_receive_queue, skb)
2112                        amount += skb->len;
2113        } else {
2114                skb = skb_peek(&sk->sk_receive_queue);
2115                if (skb)
2116                        amount = skb->len;
2117        }
2118        spin_unlock(&sk->sk_receive_queue.lock);
2119
2120        return amount;
2121}
2122EXPORT_SYMBOL_GPL(unix_inq_len);
2123
2124long unix_outq_len(struct sock *sk)
2125{
2126        return sk_wmem_alloc_get(sk);
2127}
2128EXPORT_SYMBOL_GPL(unix_outq_len);
2129
2130static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2131{
2132        struct sock *sk = sock->sk;
2133        long amount = 0;
2134        int err;
2135
2136        switch (cmd) {
2137        case SIOCOUTQ:
2138                amount = unix_outq_len(sk);
2139                err = put_user(amount, (int __user *)arg);
2140                break;
2141        case SIOCINQ:
2142                amount = unix_inq_len(sk);
2143                if (amount < 0)
2144                        err = amount;
2145                else
2146                        err = put_user(amount, (int __user *)arg);
2147                break;
2148        default:
2149                err = -ENOIOCTLCMD;
2150                break;
2151        }
2152        return err;
2153}
2154
2155static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2156{
2157        struct sock *sk = sock->sk;
2158        unsigned int mask;
2159
2160        sock_poll_wait(file, sk_sleep(sk), wait);
2161        mask = 0;
2162
2163        /* exceptional events? */
2164        if (sk->sk_err)
2165                mask |= POLLERR;
2166        if (sk->sk_shutdown == SHUTDOWN_MASK)
2167                mask |= POLLHUP;
2168        if (sk->sk_shutdown & RCV_SHUTDOWN)
2169                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2170
2171        /* readable? */
2172        if (!skb_queue_empty(&sk->sk_receive_queue))
2173                mask |= POLLIN | POLLRDNORM;
2174
2175        /* Connection-based need to check for termination and startup */
2176        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2177            sk->sk_state == TCP_CLOSE)
2178                mask |= POLLHUP;
2179
2180        /*
2181         * we set writable also when the other side has shut down the
2182         * connection. This prevents stuck sockets.
2183         */
2184        if (unix_writable(sk))
2185                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2186
2187        return mask;
2188}
2189
2190static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2191                                    poll_table *wait)
2192{
2193        struct sock *sk = sock->sk, *other;
2194        unsigned int mask, writable;
2195
2196        sock_poll_wait(file, sk_sleep(sk), wait);
2197        mask = 0;
2198
2199        /* exceptional events? */
2200        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2201                mask |= POLLERR;
2202        if (sk->sk_shutdown & RCV_SHUTDOWN)
2203                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2204        if (sk->sk_shutdown == SHUTDOWN_MASK)
2205                mask |= POLLHUP;
2206
2207        /* readable? */
2208        if (!skb_queue_empty(&sk->sk_receive_queue))
2209                mask |= POLLIN | POLLRDNORM;
2210
2211        /* Connection-based need to check for termination and startup */
2212        if (sk->sk_type == SOCK_SEQPACKET) {
2213                if (sk->sk_state == TCP_CLOSE)
2214                        mask |= POLLHUP;
2215                /* connection hasn't started yet? */
2216                if (sk->sk_state == TCP_SYN_SENT)
2217                        return mask;
2218        }
2219
2220        /* No write status requested, avoid expensive OUT tests. */
2221        if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2222                return mask;
2223
2224        writable = unix_writable(sk);
2225        other = unix_peer_get(sk);
2226        if (other) {
2227                if (unix_peer(other) != sk) {
2228                        sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2229                        if (unix_recvq_full(other))
2230                                writable = 0;
2231                }
2232                sock_put(other);
2233        }
2234
2235        if (writable)
2236                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2237        else
2238                set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2239
2240        return mask;
2241}
2242
2243#ifdef CONFIG_PROC_FS
2244
2245#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2246
2247#define get_bucket(x) ((x) >> BUCKET_SPACE)
2248#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2249#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2250
2251static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2252{
2253        unsigned long offset = get_offset(*pos);
2254        unsigned long bucket = get_bucket(*pos);
2255        struct sock *sk;
2256        unsigned long count = 0;
2257
2258        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2259                if (sock_net(sk) != seq_file_net(seq))
2260                        continue;
2261                if (++count == offset)
2262                        break;
2263        }
2264
2265        return sk;
2266}
2267
2268static struct sock *unix_next_socket(struct seq_file *seq,
2269                                     struct sock *sk,
2270                                     loff_t *pos)
2271{
2272        unsigned long bucket;
2273
2274        while (sk > (struct sock *)SEQ_START_TOKEN) {
2275                sk = sk_next(sk);
2276                if (!sk)
2277                        goto next_bucket;
2278                if (sock_net(sk) == seq_file_net(seq))
2279                        return sk;
2280        }
2281
2282        do {
2283                sk = unix_from_bucket(seq, pos);
2284                if (sk)
2285                        return sk;
2286
2287next_bucket:
2288                bucket = get_bucket(*pos) + 1;
2289                *pos = set_bucket_offset(bucket, 1);
2290        } while (bucket < ARRAY_SIZE(unix_socket_table));
2291
2292        return NULL;
2293}
2294
2295static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2296        __acquires(unix_table_lock)
2297{
2298        spin_lock(&unix_table_lock);
2299
2300        if (!*pos)
2301                return SEQ_START_TOKEN;
2302
2303        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2304                return NULL;
2305
2306        return unix_next_socket(seq, NULL, pos);
2307}
2308
2309static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2310{
2311        ++*pos;
2312        return unix_next_socket(seq, v, pos);
2313}
2314
2315static void unix_seq_stop(struct seq_file *seq, void *v)
2316        __releases(unix_table_lock)
2317{
2318        spin_unlock(&unix_table_lock);
2319}
2320
2321static int unix_seq_show(struct seq_file *seq, void *v)
2322{
2323
2324        if (v == SEQ_START_TOKEN)
2325                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2326                         "Inode Path\n");
2327        else {
2328                struct sock *s = v;
2329                struct unix_sock *u = unix_sk(s);
2330                unix_state_lock(s);
2331
2332                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2333                        s,
2334                        atomic_read(&s->sk_refcnt),
2335                        0,
2336                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2337                        s->sk_type,
2338                        s->sk_socket ?
2339                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2340                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2341                        sock_i_ino(s));
2342
2343                if (u->addr) {
2344                        int i, len;
2345                        seq_putc(seq, ' ');
2346
2347                        i = 0;
2348                        len = u->addr->len - sizeof(short);
2349                        if (!UNIX_ABSTRACT(s))
2350                                len--;
2351                        else {
2352                                seq_putc(seq, '@');
2353                                i++;
2354                        }
2355                        for ( ; i < len; i++)
2356                                seq_putc(seq, u->addr->name->sun_path[i]);
2357                }
2358                unix_state_unlock(s);
2359                seq_putc(seq, '\n');
2360        }
2361
2362        return 0;
2363}
2364
2365static const struct seq_operations unix_seq_ops = {
2366        .start  = unix_seq_start,
2367        .next   = unix_seq_next,
2368        .stop   = unix_seq_stop,
2369        .show   = unix_seq_show,
2370};
2371
2372static int unix_seq_open(struct inode *inode, struct file *file)
2373{
2374        return seq_open_net(inode, file, &unix_seq_ops,
2375                            sizeof(struct seq_net_private));
2376}
2377
2378static const struct file_operations unix_seq_fops = {
2379        .owner          = THIS_MODULE,
2380        .open           = unix_seq_open,
2381        .read           = seq_read,
2382        .llseek         = seq_lseek,
2383        .release        = seq_release_net,
2384};
2385
2386#endif
2387
2388static const struct net_proto_family unix_family_ops = {
2389        .family = PF_UNIX,
2390        .create = unix_create,
2391        .owner  = THIS_MODULE,
2392};
2393
2394
2395static int __net_init unix_net_init(struct net *net)
2396{
2397        int error = -ENOMEM;
2398
2399        net->unx.sysctl_max_dgram_qlen = 10;
2400        if (unix_sysctl_register(net))
2401                goto out;
2402
2403#ifdef CONFIG_PROC_FS
2404        if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2405                unix_sysctl_unregister(net);
2406                goto out;
2407        }
2408#endif
2409        error = 0;
2410out:
2411        return error;
2412}
2413
2414static void __net_exit unix_net_exit(struct net *net)
2415{
2416        unix_sysctl_unregister(net);
2417        proc_net_remove(net, "unix");
2418}
2419
2420static struct pernet_operations unix_net_ops = {
2421        .init = unix_net_init,
2422        .exit = unix_net_exit,
2423};
2424
2425static int __init af_unix_init(void)
2426{
2427        int rc = -1;
2428        struct sk_buff *dummy_skb;
2429
2430        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2431
2432        rc = proto_register(&unix_proto, 1);
2433        if (rc != 0) {
2434                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2435                       __func__);
2436                goto out;
2437        }
2438
2439        sock_register(&unix_family_ops);
2440        register_pernet_subsys(&unix_net_ops);
2441out:
2442        return rc;
2443}
2444
2445static void __exit af_unix_exit(void)
2446{
2447        sock_unregister(PF_UNIX);
2448        proto_unregister(&unix_proto);
2449        unregister_pernet_subsys(&unix_net_ops);
2450}
2451
2452/* Earlier than device_initcall() so that other drivers invoking
2453   request_module() don't end up in a loop when modprobe tries
2454   to use a UNIX socket. But later than subsys_initcall() because
2455   we depend on stuff initialised there */
2456fs_initcall(af_unix_init);
2457module_exit(af_unix_exit);
2458
2459MODULE_LICENSE("GPL");
2460MODULE_ALIAS_NETPROTO(PF_UNIX);
2461