linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#include <linux/module.h>
  84#include <linux/kernel.h>
  85#include <linux/signal.h>
  86#include <linux/sched.h>
  87#include <linux/errno.h>
  88#include <linux/string.h>
  89#include <linux/stat.h>
  90#include <linux/dcache.h>
  91#include <linux/namei.h>
  92#include <linux/socket.h>
  93#include <linux/un.h>
  94#include <linux/fcntl.h>
  95#include <linux/termios.h>
  96#include <linux/sockios.h>
  97#include <linux/net.h>
  98#include <linux/in.h>
  99#include <linux/fs.h>
 100#include <linux/slab.h>
 101#include <asm/uaccess.h>
 102#include <linux/skbuff.h>
 103#include <linux/netdevice.h>
 104#include <net/net_namespace.h>
 105#include <net/sock.h>
 106#include <net/tcp_states.h>
 107#include <net/af_unix.h>
 108#include <linux/proc_fs.h>
 109#include <linux/seq_file.h>
 110#include <net/scm.h>
 111#include <linux/init.h>
 112#include <linux/poll.h>
 113#include <linux/rtnetlink.h>
 114#include <linux/mount.h>
 115#include <net/checksum.h>
 116#include <linux/security.h>
 117
 118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119static DEFINE_SPINLOCK(unix_table_lock);
 120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 121
 122#define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126#ifdef CONFIG_SECURITY_NETWORK
 127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128{
 129        memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130}
 131
 132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133{
 134        scm->secid = *UNIXSID(skb);
 135}
 136#else
 137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138{ }
 139
 140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141{ }
 142#endif /* CONFIG_SECURITY_NETWORK */
 143
 144/*
 145 *  SMP locking strategy:
 146 *    hash table is protected with spinlock unix_table_lock
 147 *    each socket state is protected by separate rwlock.
 148 */
 149
 150static inline unsigned unix_hash_fold(__wsum n)
 151{
 152        unsigned hash = (__force unsigned)n;
 153        hash ^= hash>>16;
 154        hash ^= hash>>8;
 155        return hash&(UNIX_HASH_SIZE-1);
 156}
 157
 158#define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161{
 162        return unix_peer(osk) == sk;
 163}
 164
 165static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166{
 167        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 168}
 169
 170static inline int unix_recvq_full(struct sock const *sk)
 171{
 172        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173}
 174
 175static struct sock *unix_peer_get(struct sock *s)
 176{
 177        struct sock *peer;
 178
 179        unix_state_lock(s);
 180        peer = unix_peer(s);
 181        if (peer)
 182                sock_hold(peer);
 183        unix_state_unlock(s);
 184        return peer;
 185}
 186
 187static inline void unix_release_addr(struct unix_address *addr)
 188{
 189        if (atomic_dec_and_test(&addr->refcnt))
 190                kfree(addr);
 191}
 192
 193/*
 194 *      Check unix socket name:
 195 *              - should be not zero length.
 196 *              - if started by not zero, should be NULL terminated (FS object)
 197 *              - if started by zero, it is abstract name.
 198 */
 199
 200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 201{
 202        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                return -EINVAL;
 204        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                return -EINVAL;
 206        if (sunaddr->sun_path[0]) {
 207                /*
 208                 * This may look like an off by one error but it is a bit more
 209                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                 * sun_path[108] doesnt as such exist.  However in kernel space
 211                 * we are guaranteed that it is a valid memory location in our
 212                 * kernel address buffer.
 213                 */
 214                ((char *)sunaddr)[len] = 0;
 215                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                return len;
 217        }
 218
 219        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 220        return len;
 221}
 222
 223static void __unix_remove_socket(struct sock *sk)
 224{
 225        sk_del_node_init(sk);
 226}
 227
 228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229{
 230        WARN_ON(!sk_unhashed(sk));
 231        sk_add_node(sk, list);
 232}
 233
 234static inline void unix_remove_socket(struct sock *sk)
 235{
 236        spin_lock(&unix_table_lock);
 237        __unix_remove_socket(sk);
 238        spin_unlock(&unix_table_lock);
 239}
 240
 241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242{
 243        spin_lock(&unix_table_lock);
 244        __unix_insert_socket(list, sk);
 245        spin_unlock(&unix_table_lock);
 246}
 247
 248static struct sock *__unix_find_socket_byname(struct net *net,
 249                                              struct sockaddr_un *sunname,
 250                                              int len, int type, unsigned hash)
 251{
 252        struct sock *s;
 253        struct hlist_node *node;
 254
 255        sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                struct unix_sock *u = unix_sk(s);
 257
 258                if (!net_eq(sock_net(s), net))
 259                        continue;
 260
 261                if (u->addr->len == len &&
 262                    !memcmp(u->addr->name, sunname, len))
 263                        goto found;
 264        }
 265        s = NULL;
 266found:
 267        return s;
 268}
 269
 270static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                   struct sockaddr_un *sunname,
 272                                                   int len, int type,
 273                                                   unsigned hash)
 274{
 275        struct sock *s;
 276
 277        spin_lock(&unix_table_lock);
 278        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279        if (s)
 280                sock_hold(s);
 281        spin_unlock(&unix_table_lock);
 282        return s;
 283}
 284
 285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 286{
 287        struct sock *s;
 288        struct hlist_node *node;
 289
 290        spin_lock(&unix_table_lock);
 291        sk_for_each(s, node,
 292                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                if (!net_eq(sock_net(s), net))
 296                        continue;
 297
 298                if (dentry && dentry->d_inode == i) {
 299                        sock_hold(s);
 300                        goto found;
 301                }
 302        }
 303        s = NULL;
 304found:
 305        spin_unlock(&unix_table_lock);
 306        return s;
 307}
 308
 309static inline int unix_writable(struct sock *sk)
 310{
 311        return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 312}
 313
 314static void unix_write_space(struct sock *sk)
 315{
 316        read_lock(&sk->sk_callback_lock);
 317        if (unix_writable(sk)) {
 318                if (sk_has_sleeper(sk))
 319                        wake_up_interruptible_sync(sk->sk_sleep);
 320                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 321        }
 322        read_unlock(&sk->sk_callback_lock);
 323}
 324
 325/* When dgram socket disconnects (or changes its peer), we clear its receive
 326 * queue of packets arrived from previous peer. First, it allows to do
 327 * flow control based only on wmem_alloc; second, sk connected to peer
 328 * may receive messages only from that peer. */
 329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 330{
 331        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 332                skb_queue_purge(&sk->sk_receive_queue);
 333                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 334
 335                /* If one link of bidirectional dgram pipe is disconnected,
 336                 * we signal error. Messages are lost. Do not make this,
 337                 * when peer was not connected to us.
 338                 */
 339                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 340                        other->sk_err = ECONNRESET;
 341                        other->sk_error_report(other);
 342                }
 343        }
 344}
 345
 346static void unix_sock_destructor(struct sock *sk)
 347{
 348        struct unix_sock *u = unix_sk(sk);
 349
 350        skb_queue_purge(&sk->sk_receive_queue);
 351
 352        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 353        WARN_ON(!sk_unhashed(sk));
 354        WARN_ON(sk->sk_socket);
 355        if (!sock_flag(sk, SOCK_DEAD)) {
 356                printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 357                return;
 358        }
 359
 360        if (u->addr)
 361                unix_release_addr(u->addr);
 362
 363        atomic_dec(&unix_nr_socks);
 364        local_bh_disable();
 365        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 366        local_bh_enable();
 367#ifdef UNIX_REFCNT_DEBUG
 368        printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
 369                atomic_read(&unix_nr_socks));
 370#endif
 371}
 372
 373static int unix_release_sock(struct sock *sk, int embrion)
 374{
 375        struct unix_sock *u = unix_sk(sk);
 376        struct dentry *dentry;
 377        struct vfsmount *mnt;
 378        struct sock *skpair;
 379        struct sk_buff *skb;
 380        int state;
 381
 382        unix_remove_socket(sk);
 383
 384        /* Clear state */
 385        unix_state_lock(sk);
 386        sock_orphan(sk);
 387        sk->sk_shutdown = SHUTDOWN_MASK;
 388        dentry       = u->dentry;
 389        u->dentry    = NULL;
 390        mnt          = u->mnt;
 391        u->mnt       = NULL;
 392        state = sk->sk_state;
 393        sk->sk_state = TCP_CLOSE;
 394        unix_state_unlock(sk);
 395
 396        wake_up_interruptible_all(&u->peer_wait);
 397
 398        skpair = unix_peer(sk);
 399
 400        if (skpair != NULL) {
 401                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 402                        unix_state_lock(skpair);
 403                        /* No more writes */
 404                        skpair->sk_shutdown = SHUTDOWN_MASK;
 405                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 406                                skpair->sk_err = ECONNRESET;
 407                        unix_state_unlock(skpair);
 408                        skpair->sk_state_change(skpair);
 409                        read_lock(&skpair->sk_callback_lock);
 410                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 411                        read_unlock(&skpair->sk_callback_lock);
 412                }
 413                sock_put(skpair); /* It may now die */
 414                unix_peer(sk) = NULL;
 415        }
 416
 417        /* Try to flush out this socket. Throw out buffers at least */
 418
 419        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 420                if (state == TCP_LISTEN)
 421                        unix_release_sock(skb->sk, 1);
 422                /* passed fds are erased in the kfree_skb hook        */
 423                kfree_skb(skb);
 424        }
 425
 426        if (dentry) {
 427                dput(dentry);
 428                mntput(mnt);
 429        }
 430
 431        sock_put(sk);
 432
 433        /* ---- Socket is dead now and most probably destroyed ---- */
 434
 435        /*
 436         * Fixme: BSD difference: In BSD all sockets connected to use get
 437         *        ECONNRESET and we die on the spot. In Linux we behave
 438         *        like files and pipes do and wait for the last
 439         *        dereference.
 440         *
 441         * Can't we simply set sock->err?
 442         *
 443         *        What the above comment does talk about? --ANK(980817)
 444         */
 445
 446        if (unix_tot_inflight)
 447                unix_gc();              /* Garbage collect fds */
 448
 449        return 0;
 450}
 451
 452static int unix_listen(struct socket *sock, int backlog)
 453{
 454        int err;
 455        struct sock *sk = sock->sk;
 456        struct unix_sock *u = unix_sk(sk);
 457
 458        err = -EOPNOTSUPP;
 459        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 460                goto out;       /* Only stream/seqpacket sockets accept */
 461        err = -EINVAL;
 462        if (!u->addr)
 463                goto out;       /* No listens on an unbound socket */
 464        unix_state_lock(sk);
 465        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 466                goto out_unlock;
 467        if (backlog > sk->sk_max_ack_backlog)
 468                wake_up_interruptible_all(&u->peer_wait);
 469        sk->sk_max_ack_backlog  = backlog;
 470        sk->sk_state            = TCP_LISTEN;
 471        /* set credentials so connect can copy them */
 472        sk->sk_peercred.pid     = task_tgid_vnr(current);
 473        current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
 474        err = 0;
 475
 476out_unlock:
 477        unix_state_unlock(sk);
 478out:
 479        return err;
 480}
 481
 482static int unix_release(struct socket *);
 483static int unix_bind(struct socket *, struct sockaddr *, int);
 484static int unix_stream_connect(struct socket *, struct sockaddr *,
 485                               int addr_len, int flags);
 486static int unix_socketpair(struct socket *, struct socket *);
 487static int unix_accept(struct socket *, struct socket *, int);
 488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 490static unsigned int unix_dgram_poll(struct file *, struct socket *,
 491                                    poll_table *);
 492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 493static int unix_shutdown(struct socket *, int);
 494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 495                               struct msghdr *, size_t);
 496static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 497                               struct msghdr *, size_t, int);
 498static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 499                              struct msghdr *, size_t);
 500static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 501                              struct msghdr *, size_t, int);
 502static int unix_dgram_connect(struct socket *, struct sockaddr *,
 503                              int, int);
 504static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 505                                  struct msghdr *, size_t);
 506
 507static const struct proto_ops unix_stream_ops = {
 508        .family =       PF_UNIX,
 509        .owner =        THIS_MODULE,
 510        .release =      unix_release,
 511        .bind =         unix_bind,
 512        .connect =      unix_stream_connect,
 513        .socketpair =   unix_socketpair,
 514        .accept =       unix_accept,
 515        .getname =      unix_getname,
 516        .poll =         unix_poll,
 517        .ioctl =        unix_ioctl,
 518        .listen =       unix_listen,
 519        .shutdown =     unix_shutdown,
 520        .setsockopt =   sock_no_setsockopt,
 521        .getsockopt =   sock_no_getsockopt,
 522        .sendmsg =      unix_stream_sendmsg,
 523        .recvmsg =      unix_stream_recvmsg,
 524        .mmap =         sock_no_mmap,
 525        .sendpage =     sock_no_sendpage,
 526};
 527
 528static const struct proto_ops unix_dgram_ops = {
 529        .family =       PF_UNIX,
 530        .owner =        THIS_MODULE,
 531        .release =      unix_release,
 532        .bind =         unix_bind,
 533        .connect =      unix_dgram_connect,
 534        .socketpair =   unix_socketpair,
 535        .accept =       sock_no_accept,
 536        .getname =      unix_getname,
 537        .poll =         unix_dgram_poll,
 538        .ioctl =        unix_ioctl,
 539        .listen =       sock_no_listen,
 540        .shutdown =     unix_shutdown,
 541        .setsockopt =   sock_no_setsockopt,
 542        .getsockopt =   sock_no_getsockopt,
 543        .sendmsg =      unix_dgram_sendmsg,
 544        .recvmsg =      unix_dgram_recvmsg,
 545        .mmap =         sock_no_mmap,
 546        .sendpage =     sock_no_sendpage,
 547};
 548
 549static const struct proto_ops unix_seqpacket_ops = {
 550        .family =       PF_UNIX,
 551        .owner =        THIS_MODULE,
 552        .release =      unix_release,
 553        .bind =         unix_bind,
 554        .connect =      unix_stream_connect,
 555        .socketpair =   unix_socketpair,
 556        .accept =       unix_accept,
 557        .getname =      unix_getname,
 558        .poll =         unix_dgram_poll,
 559        .ioctl =        unix_ioctl,
 560        .listen =       unix_listen,
 561        .shutdown =     unix_shutdown,
 562        .setsockopt =   sock_no_setsockopt,
 563        .getsockopt =   sock_no_getsockopt,
 564        .sendmsg =      unix_seqpacket_sendmsg,
 565        .recvmsg =      unix_dgram_recvmsg,
 566        .mmap =         sock_no_mmap,
 567        .sendpage =     sock_no_sendpage,
 568};
 569
 570static struct proto unix_proto = {
 571        .name                   = "UNIX",
 572        .owner                  = THIS_MODULE,
 573        .obj_size               = sizeof(struct unix_sock),
 574};
 575
 576/*
 577 * AF_UNIX sockets do not interact with hardware, hence they
 578 * dont trigger interrupts - so it's safe for them to have
 579 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 580 * this special lock-class by reinitializing the spinlock key:
 581 */
 582static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 583
 584static struct sock *unix_create1(struct net *net, struct socket *sock)
 585{
 586        struct sock *sk = NULL;
 587        struct unix_sock *u;
 588
 589        atomic_inc(&unix_nr_socks);
 590        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
 591                goto out;
 592
 593        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 594        if (!sk)
 595                goto out;
 596
 597        sock_init_data(sock, sk);
 598        lockdep_set_class(&sk->sk_receive_queue.lock,
 599                                &af_unix_sk_receive_queue_lock_key);
 600
 601        sk->sk_write_space      = unix_write_space;
 602        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 603        sk->sk_destruct         = unix_sock_destructor;
 604        u         = unix_sk(sk);
 605        u->dentry = NULL;
 606        u->mnt    = NULL;
 607        spin_lock_init(&u->lock);
 608        atomic_long_set(&u->inflight, 0);
 609        INIT_LIST_HEAD(&u->link);
 610        mutex_init(&u->readlock); /* single task reading lock */
 611        init_waitqueue_head(&u->peer_wait);
 612        unix_insert_socket(unix_sockets_unbound, sk);
 613out:
 614        if (sk == NULL)
 615                atomic_dec(&unix_nr_socks);
 616        else {
 617                local_bh_disable();
 618                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 619                local_bh_enable();
 620        }
 621        return sk;
 622}
 623
 624static int unix_create(struct net *net, struct socket *sock, int protocol)
 625{
 626        if (protocol && protocol != PF_UNIX)
 627                return -EPROTONOSUPPORT;
 628
 629        sock->state = SS_UNCONNECTED;
 630
 631        switch (sock->type) {
 632        case SOCK_STREAM:
 633                sock->ops = &unix_stream_ops;
 634                break;
 635                /*
 636                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 637                 *      nothing uses it.
 638                 */
 639        case SOCK_RAW:
 640                sock->type = SOCK_DGRAM;
 641        case SOCK_DGRAM:
 642                sock->ops = &unix_dgram_ops;
 643                break;
 644        case SOCK_SEQPACKET:
 645                sock->ops = &unix_seqpacket_ops;
 646                break;
 647        default:
 648                return -ESOCKTNOSUPPORT;
 649        }
 650
 651        return unix_create1(net, sock) ? 0 : -ENOMEM;
 652}
 653
 654static int unix_release(struct socket *sock)
 655{
 656        struct sock *sk = sock->sk;
 657
 658        if (!sk)
 659                return 0;
 660
 661        sock->sk = NULL;
 662
 663        return unix_release_sock(sk, 0);
 664}
 665
 666static int unix_autobind(struct socket *sock)
 667{
 668        struct sock *sk = sock->sk;
 669        struct net *net = sock_net(sk);
 670        struct unix_sock *u = unix_sk(sk);
 671        static u32 ordernum = 1;
 672        struct unix_address *addr;
 673        int err;
 674
 675        mutex_lock(&u->readlock);
 676
 677        err = 0;
 678        if (u->addr)
 679                goto out;
 680
 681        err = -ENOMEM;
 682        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 683        if (!addr)
 684                goto out;
 685
 686        addr->name->sun_family = AF_UNIX;
 687        atomic_set(&addr->refcnt, 1);
 688
 689retry:
 690        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 691        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 692
 693        spin_lock(&unix_table_lock);
 694        ordernum = (ordernum+1)&0xFFFFF;
 695
 696        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 697                                      addr->hash)) {
 698                spin_unlock(&unix_table_lock);
 699                /* Sanity yield. It is unusual case, but yet... */
 700                if (!(ordernum&0xFF))
 701                        yield();
 702                goto retry;
 703        }
 704        addr->hash ^= sk->sk_type;
 705
 706        __unix_remove_socket(sk);
 707        u->addr = addr;
 708        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 709        spin_unlock(&unix_table_lock);
 710        err = 0;
 711
 712out:    mutex_unlock(&u->readlock);
 713        return err;
 714}
 715
 716static struct sock *unix_find_other(struct net *net,
 717                                    struct sockaddr_un *sunname, int len,
 718                                    int type, unsigned hash, int *error)
 719{
 720        struct sock *u;
 721        struct path path;
 722        int err = 0;
 723
 724        if (sunname->sun_path[0]) {
 725                struct inode *inode;
 726                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 727                if (err)
 728                        goto fail;
 729                inode = path.dentry->d_inode;
 730                err = inode_permission(inode, MAY_WRITE);
 731                if (err)
 732                        goto put_fail;
 733
 734                err = -ECONNREFUSED;
 735                if (!S_ISSOCK(inode->i_mode))
 736                        goto put_fail;
 737                u = unix_find_socket_byinode(net, inode);
 738                if (!u)
 739                        goto put_fail;
 740
 741                if (u->sk_type == type)
 742                        touch_atime(path.mnt, path.dentry);
 743
 744                path_put(&path);
 745
 746                err = -EPROTOTYPE;
 747                if (u->sk_type != type) {
 748                        sock_put(u);
 749                        goto fail;
 750                }
 751        } else {
 752                err = -ECONNREFUSED;
 753                u = unix_find_socket_byname(net, sunname, len, type, hash);
 754                if (u) {
 755                        struct dentry *dentry;
 756                        dentry = unix_sk(u)->dentry;
 757                        if (dentry)
 758                                touch_atime(unix_sk(u)->mnt, dentry);
 759                } else
 760                        goto fail;
 761        }
 762        return u;
 763
 764put_fail:
 765        path_put(&path);
 766fail:
 767        *error = err;
 768        return NULL;
 769}
 770
 771
 772static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 773{
 774        struct sock *sk = sock->sk;
 775        struct net *net = sock_net(sk);
 776        struct unix_sock *u = unix_sk(sk);
 777        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 778        struct dentry *dentry = NULL;
 779        struct nameidata nd;
 780        int err;
 781        unsigned hash;
 782        struct unix_address *addr;
 783        struct hlist_head *list;
 784
 785        err = -EINVAL;
 786        if (sunaddr->sun_family != AF_UNIX)
 787                goto out;
 788
 789        if (addr_len == sizeof(short)) {
 790                err = unix_autobind(sock);
 791                goto out;
 792        }
 793
 794        err = unix_mkname(sunaddr, addr_len, &hash);
 795        if (err < 0)
 796                goto out;
 797        addr_len = err;
 798
 799        mutex_lock(&u->readlock);
 800
 801        err = -EINVAL;
 802        if (u->addr)
 803                goto out_up;
 804
 805        err = -ENOMEM;
 806        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 807        if (!addr)
 808                goto out_up;
 809
 810        memcpy(addr->name, sunaddr, addr_len);
 811        addr->len = addr_len;
 812        addr->hash = hash ^ sk->sk_type;
 813        atomic_set(&addr->refcnt, 1);
 814
 815        if (sunaddr->sun_path[0]) {
 816                unsigned int mode;
 817                err = 0;
 818                /*
 819                 * Get the parent directory, calculate the hash for last
 820                 * component.
 821                 */
 822                err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 823                if (err)
 824                        goto out_mknod_parent;
 825
 826                dentry = lookup_create(&nd, 0);
 827                err = PTR_ERR(dentry);
 828                if (IS_ERR(dentry))
 829                        goto out_mknod_unlock;
 830
 831                /*
 832                 * All right, let's create it.
 833                 */
 834                mode = S_IFSOCK |
 835                       (SOCK_INODE(sock)->i_mode & ~current_umask());
 836                err = mnt_want_write(nd.path.mnt);
 837                if (err)
 838                        goto out_mknod_dput;
 839                err = security_path_mknod(&nd.path, dentry, mode, 0);
 840                if (err)
 841                        goto out_mknod_drop_write;
 842                err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 843out_mknod_drop_write:
 844                mnt_drop_write(nd.path.mnt);
 845                if (err)
 846                        goto out_mknod_dput;
 847                mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 848                dput(nd.path.dentry);
 849                nd.path.dentry = dentry;
 850
 851                addr->hash = UNIX_HASH_SIZE;
 852        }
 853
 854        spin_lock(&unix_table_lock);
 855
 856        if (!sunaddr->sun_path[0]) {
 857                err = -EADDRINUSE;
 858                if (__unix_find_socket_byname(net, sunaddr, addr_len,
 859                                              sk->sk_type, hash)) {
 860                        unix_release_addr(addr);
 861                        goto out_unlock;
 862                }
 863
 864                list = &unix_socket_table[addr->hash];
 865        } else {
 866                list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 867                u->dentry = nd.path.dentry;
 868                u->mnt    = nd.path.mnt;
 869        }
 870
 871        err = 0;
 872        __unix_remove_socket(sk);
 873        u->addr = addr;
 874        __unix_insert_socket(list, sk);
 875
 876out_unlock:
 877        spin_unlock(&unix_table_lock);
 878out_up:
 879        mutex_unlock(&u->readlock);
 880out:
 881        return err;
 882
 883out_mknod_dput:
 884        dput(dentry);
 885out_mknod_unlock:
 886        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 887        path_put(&nd.path);
 888out_mknod_parent:
 889        if (err == -EEXIST)
 890                err = -EADDRINUSE;
 891        unix_release_addr(addr);
 892        goto out_up;
 893}
 894
 895static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 896{
 897        if (unlikely(sk1 == sk2) || !sk2) {
 898                unix_state_lock(sk1);
 899                return;
 900        }
 901        if (sk1 < sk2) {
 902                unix_state_lock(sk1);
 903                unix_state_lock_nested(sk2);
 904        } else {
 905                unix_state_lock(sk2);
 906                unix_state_lock_nested(sk1);
 907        }
 908}
 909
 910static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 911{
 912        if (unlikely(sk1 == sk2) || !sk2) {
 913                unix_state_unlock(sk1);
 914                return;
 915        }
 916        unix_state_unlock(sk1);
 917        unix_state_unlock(sk2);
 918}
 919
 920static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 921                              int alen, int flags)
 922{
 923        struct sock *sk = sock->sk;
 924        struct net *net = sock_net(sk);
 925        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 926        struct sock *other;
 927        unsigned hash;
 928        int err;
 929
 930        if (addr->sa_family != AF_UNSPEC) {
 931                err = unix_mkname(sunaddr, alen, &hash);
 932                if (err < 0)
 933                        goto out;
 934                alen = err;
 935
 936                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 937                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 938                        goto out;
 939
 940restart:
 941                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 942                if (!other)
 943                        goto out;
 944
 945                unix_state_double_lock(sk, other);
 946
 947                /* Apparently VFS overslept socket death. Retry. */
 948                if (sock_flag(other, SOCK_DEAD)) {
 949                        unix_state_double_unlock(sk, other);
 950                        sock_put(other);
 951                        goto restart;
 952                }
 953
 954                err = -EPERM;
 955                if (!unix_may_send(sk, other))
 956                        goto out_unlock;
 957
 958                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 959                if (err)
 960                        goto out_unlock;
 961
 962        } else {
 963                /*
 964                 *      1003.1g breaking connected state with AF_UNSPEC
 965                 */
 966                other = NULL;
 967                unix_state_double_lock(sk, other);
 968        }
 969
 970        /*
 971         * If it was connected, reconnect.
 972         */
 973        if (unix_peer(sk)) {
 974                struct sock *old_peer = unix_peer(sk);
 975                unix_peer(sk) = other;
 976                unix_state_double_unlock(sk, other);
 977
 978                if (other != old_peer)
 979                        unix_dgram_disconnected(sk, old_peer);
 980                sock_put(old_peer);
 981        } else {
 982                unix_peer(sk) = other;
 983                unix_state_double_unlock(sk, other);
 984        }
 985        return 0;
 986
 987out_unlock:
 988        unix_state_double_unlock(sk, other);
 989        sock_put(other);
 990out:
 991        return err;
 992}
 993
 994static long unix_wait_for_peer(struct sock *other, long timeo)
 995{
 996        struct unix_sock *u = unix_sk(other);
 997        int sched;
 998        DEFINE_WAIT(wait);
 999
1000        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001
1002        sched = !sock_flag(other, SOCK_DEAD) &&
1003                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1004                unix_recvq_full(other);
1005
1006        unix_state_unlock(other);
1007
1008        if (sched)
1009                timeo = schedule_timeout(timeo);
1010
1011        finish_wait(&u->peer_wait, &wait);
1012        return timeo;
1013}
1014
1015static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016                               int addr_len, int flags)
1017{
1018        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019        struct sock *sk = sock->sk;
1020        struct net *net = sock_net(sk);
1021        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022        struct sock *newsk = NULL;
1023        struct sock *other = NULL;
1024        struct sk_buff *skb = NULL;
1025        unsigned hash;
1026        int st;
1027        int err;
1028        long timeo;
1029
1030        err = unix_mkname(sunaddr, addr_len, &hash);
1031        if (err < 0)
1032                goto out;
1033        addr_len = err;
1034
1035        if (test_bit(SOCK_PASSCRED, &sock->flags)
1036                && !u->addr && (err = unix_autobind(sock)) != 0)
1037                goto out;
1038
1039        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040
1041        /* First of all allocate resources.
1042           If we will make it after state is locked,
1043           we will have to recheck all again in any case.
1044         */
1045
1046        err = -ENOMEM;
1047
1048        /* create new sock for complete connection */
1049        newsk = unix_create1(sock_net(sk), NULL);
1050        if (newsk == NULL)
1051                goto out;
1052
1053        /* Allocate skb for sending to listening sock */
1054        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055        if (skb == NULL)
1056                goto out;
1057
1058restart:
1059        /*  Find listening sock. */
1060        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061        if (!other)
1062                goto out;
1063
1064        /* Latch state of peer */
1065        unix_state_lock(other);
1066
1067        /* Apparently VFS overslept socket death. Retry. */
1068        if (sock_flag(other, SOCK_DEAD)) {
1069                unix_state_unlock(other);
1070                sock_put(other);
1071                goto restart;
1072        }
1073
1074        err = -ECONNREFUSED;
1075        if (other->sk_state != TCP_LISTEN)
1076                goto out_unlock;
1077        if (other->sk_shutdown & RCV_SHUTDOWN)
1078                goto out_unlock;
1079
1080        if (unix_recvq_full(other)) {
1081                err = -EAGAIN;
1082                if (!timeo)
1083                        goto out_unlock;
1084
1085                timeo = unix_wait_for_peer(other, timeo);
1086
1087                err = sock_intr_errno(timeo);
1088                if (signal_pending(current))
1089                        goto out;
1090                sock_put(other);
1091                goto restart;
1092        }
1093
1094        /* Latch our state.
1095
1096           It is tricky place. We need to grab write lock and cannot
1097           drop lock on peer. It is dangerous because deadlock is
1098           possible. Connect to self case and simultaneous
1099           attempt to connect are eliminated by checking socket
1100           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1101           check this before attempt to grab lock.
1102
1103           Well, and we have to recheck the state after socket locked.
1104         */
1105        st = sk->sk_state;
1106
1107        switch (st) {
1108        case TCP_CLOSE:
1109                /* This is ok... continue with connect */
1110                break;
1111        case TCP_ESTABLISHED:
1112                /* Socket is already connected */
1113                err = -EISCONN;
1114                goto out_unlock;
1115        default:
1116                err = -EINVAL;
1117                goto out_unlock;
1118        }
1119
1120        unix_state_lock_nested(sk);
1121
1122        if (sk->sk_state != st) {
1123                unix_state_unlock(sk);
1124                unix_state_unlock(other);
1125                sock_put(other);
1126                goto restart;
1127        }
1128
1129        err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1130        if (err) {
1131                unix_state_unlock(sk);
1132                goto out_unlock;
1133        }
1134
1135        /* The way is open! Fastly set all the necessary fields... */
1136
1137        sock_hold(sk);
1138        unix_peer(newsk)        = sk;
1139        newsk->sk_state         = TCP_ESTABLISHED;
1140        newsk->sk_type          = sk->sk_type;
1141        newsk->sk_peercred.pid  = task_tgid_vnr(current);
1142        current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1143        newu = unix_sk(newsk);
1144        newsk->sk_sleep         = &newu->peer_wait;
1145        otheru = unix_sk(other);
1146
1147        /* copy address information from listening to new sock*/
1148        if (otheru->addr) {
1149                atomic_inc(&otheru->addr->refcnt);
1150                newu->addr = otheru->addr;
1151        }
1152        if (otheru->dentry) {
1153                newu->dentry    = dget(otheru->dentry);
1154                newu->mnt       = mntget(otheru->mnt);
1155        }
1156
1157        /* Set credentials */
1158        sk->sk_peercred = other->sk_peercred;
1159
1160        sock->state     = SS_CONNECTED;
1161        sk->sk_state    = TCP_ESTABLISHED;
1162        sock_hold(newsk);
1163
1164        smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1165        unix_peer(sk)   = newsk;
1166
1167        unix_state_unlock(sk);
1168
1169        /* take ten and and send info to listening sock */
1170        spin_lock(&other->sk_receive_queue.lock);
1171        __skb_queue_tail(&other->sk_receive_queue, skb);
1172        spin_unlock(&other->sk_receive_queue.lock);
1173        unix_state_unlock(other);
1174        other->sk_data_ready(other, 0);
1175        sock_put(other);
1176        return 0;
1177
1178out_unlock:
1179        if (other)
1180                unix_state_unlock(other);
1181
1182out:
1183        kfree_skb(skb);
1184        if (newsk)
1185                unix_release_sock(newsk, 0);
1186        if (other)
1187                sock_put(other);
1188        return err;
1189}
1190
1191static int unix_socketpair(struct socket *socka, struct socket *sockb)
1192{
1193        struct sock *ska = socka->sk, *skb = sockb->sk;
1194
1195        /* Join our sockets back to back */
1196        sock_hold(ska);
1197        sock_hold(skb);
1198        unix_peer(ska) = skb;
1199        unix_peer(skb) = ska;
1200        ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1201        current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1202        ska->sk_peercred.uid = skb->sk_peercred.uid;
1203        ska->sk_peercred.gid = skb->sk_peercred.gid;
1204
1205        if (ska->sk_type != SOCK_DGRAM) {
1206                ska->sk_state = TCP_ESTABLISHED;
1207                skb->sk_state = TCP_ESTABLISHED;
1208                socka->state  = SS_CONNECTED;
1209                sockb->state  = SS_CONNECTED;
1210        }
1211        return 0;
1212}
1213
1214static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1215{
1216        struct sock *sk = sock->sk;
1217        struct sock *tsk;
1218        struct sk_buff *skb;
1219        int err;
1220
1221        err = -EOPNOTSUPP;
1222        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1223                goto out;
1224
1225        err = -EINVAL;
1226        if (sk->sk_state != TCP_LISTEN)
1227                goto out;
1228
1229        /* If socket state is TCP_LISTEN it cannot change (for now...),
1230         * so that no locks are necessary.
1231         */
1232
1233        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1234        if (!skb) {
1235                /* This means receive shutdown. */
1236                if (err == 0)
1237                        err = -EINVAL;
1238                goto out;
1239        }
1240
1241        tsk = skb->sk;
1242        skb_free_datagram(sk, skb);
1243        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1244
1245        /* attach accepted sock to socket */
1246        unix_state_lock(tsk);
1247        newsock->state = SS_CONNECTED;
1248        sock_graft(tsk, newsock);
1249        unix_state_unlock(tsk);
1250        return 0;
1251
1252out:
1253        return err;
1254}
1255
1256
1257static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1258{
1259        struct sock *sk = sock->sk;
1260        struct unix_sock *u;
1261        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1262        int err = 0;
1263
1264        if (peer) {
1265                sk = unix_peer_get(sk);
1266
1267                err = -ENOTCONN;
1268                if (!sk)
1269                        goto out;
1270                err = 0;
1271        } else {
1272                sock_hold(sk);
1273        }
1274
1275        u = unix_sk(sk);
1276        unix_state_lock(sk);
1277        if (!u->addr) {
1278                sunaddr->sun_family = AF_UNIX;
1279                sunaddr->sun_path[0] = 0;
1280                *uaddr_len = sizeof(short);
1281        } else {
1282                struct unix_address *addr = u->addr;
1283
1284                *uaddr_len = addr->len;
1285                memcpy(sunaddr, addr->name, *uaddr_len);
1286        }
1287        unix_state_unlock(sk);
1288        sock_put(sk);
1289out:
1290        return err;
1291}
1292
1293static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1294{
1295        int i;
1296
1297        scm->fp = UNIXCB(skb).fp;
1298        skb->destructor = sock_wfree;
1299        UNIXCB(skb).fp = NULL;
1300
1301        for (i = scm->fp->count-1; i >= 0; i--)
1302                unix_notinflight(scm->fp->fp[i]);
1303}
1304
1305static void unix_destruct_fds(struct sk_buff *skb)
1306{
1307        struct scm_cookie scm;
1308        memset(&scm, 0, sizeof(scm));
1309        unix_detach_fds(&scm, skb);
1310
1311        /* Alas, it calls VFS */
1312        /* So fscking what? fput() had been SMP-safe since the last Summer */
1313        scm_destroy(&scm);
1314        sock_wfree(skb);
1315}
1316
1317static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1318{
1319        int i;
1320
1321        /*
1322         * Need to duplicate file references for the sake of garbage
1323         * collection.  Otherwise a socket in the fps might become a
1324         * candidate for GC while the skb is not yet queued.
1325         */
1326        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1327        if (!UNIXCB(skb).fp)
1328                return -ENOMEM;
1329
1330        for (i = scm->fp->count-1; i >= 0; i--)
1331                unix_inflight(scm->fp->fp[i]);
1332        skb->destructor = unix_destruct_fds;
1333        return 0;
1334}
1335
1336/*
1337 *      Send AF_UNIX data.
1338 */
1339
1340static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1341                              struct msghdr *msg, size_t len)
1342{
1343        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1344        struct sock *sk = sock->sk;
1345        struct net *net = sock_net(sk);
1346        struct unix_sock *u = unix_sk(sk);
1347        struct sockaddr_un *sunaddr = msg->msg_name;
1348        struct sock *other = NULL;
1349        int namelen = 0; /* fake GCC */
1350        int err;
1351        unsigned hash;
1352        struct sk_buff *skb;
1353        long timeo;
1354        struct scm_cookie tmp_scm;
1355
1356        if (NULL == siocb->scm)
1357                siocb->scm = &tmp_scm;
1358        wait_for_unix_gc();
1359        err = scm_send(sock, msg, siocb->scm);
1360        if (err < 0)
1361                return err;
1362
1363        err = -EOPNOTSUPP;
1364        if (msg->msg_flags&MSG_OOB)
1365                goto out;
1366
1367        if (msg->msg_namelen) {
1368                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1369                if (err < 0)
1370                        goto out;
1371                namelen = err;
1372        } else {
1373                sunaddr = NULL;
1374                err = -ENOTCONN;
1375                other = unix_peer_get(sk);
1376                if (!other)
1377                        goto out;
1378        }
1379
1380        if (test_bit(SOCK_PASSCRED, &sock->flags)
1381                && !u->addr && (err = unix_autobind(sock)) != 0)
1382                goto out;
1383
1384        err = -EMSGSIZE;
1385        if (len > sk->sk_sndbuf - 32)
1386                goto out;
1387
1388        skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1389        if (skb == NULL)
1390                goto out;
1391
1392        memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1393        if (siocb->scm->fp) {
1394                err = unix_attach_fds(siocb->scm, skb);
1395                if (err)
1396                        goto out_free;
1397        }
1398        unix_get_secdata(siocb->scm, skb);
1399
1400        skb_reset_transport_header(skb);
1401        err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1402        if (err)
1403                goto out_free;
1404
1405        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1406
1407restart:
1408        if (!other) {
1409                err = -ECONNRESET;
1410                if (sunaddr == NULL)
1411                        goto out_free;
1412
1413                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1414                                        hash, &err);
1415                if (other == NULL)
1416                        goto out_free;
1417        }
1418
1419        unix_state_lock(other);
1420        err = -EPERM;
1421        if (!unix_may_send(sk, other))
1422                goto out_unlock;
1423
1424        if (sock_flag(other, SOCK_DEAD)) {
1425                /*
1426                 *      Check with 1003.1g - what should
1427                 *      datagram error
1428                 */
1429                unix_state_unlock(other);
1430                sock_put(other);
1431
1432                err = 0;
1433                unix_state_lock(sk);
1434                if (unix_peer(sk) == other) {
1435                        unix_peer(sk) = NULL;
1436                        unix_state_unlock(sk);
1437
1438                        unix_dgram_disconnected(sk, other);
1439                        sock_put(other);
1440                        err = -ECONNREFUSED;
1441                } else {
1442                        unix_state_unlock(sk);
1443                }
1444
1445                other = NULL;
1446                if (err)
1447                        goto out_free;
1448                goto restart;
1449        }
1450
1451        err = -EPIPE;
1452        if (other->sk_shutdown & RCV_SHUTDOWN)
1453                goto out_unlock;
1454
1455        if (sk->sk_type != SOCK_SEQPACKET) {
1456                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1457                if (err)
1458                        goto out_unlock;
1459        }
1460
1461        if (unix_peer(other) != sk && unix_recvq_full(other)) {
1462                if (!timeo) {
1463                        err = -EAGAIN;
1464                        goto out_unlock;
1465                }
1466
1467                timeo = unix_wait_for_peer(other, timeo);
1468
1469                err = sock_intr_errno(timeo);
1470                if (signal_pending(current))
1471                        goto out_free;
1472
1473                goto restart;
1474        }
1475
1476        skb_queue_tail(&other->sk_receive_queue, skb);
1477        unix_state_unlock(other);
1478        other->sk_data_ready(other, len);
1479        sock_put(other);
1480        scm_destroy(siocb->scm);
1481        return len;
1482
1483out_unlock:
1484        unix_state_unlock(other);
1485out_free:
1486        kfree_skb(skb);
1487out:
1488        if (other)
1489                sock_put(other);
1490        scm_destroy(siocb->scm);
1491        return err;
1492}
1493
1494
1495static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1496                               struct msghdr *msg, size_t len)
1497{
1498        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1499        struct sock *sk = sock->sk;
1500        struct sock *other = NULL;
1501        struct sockaddr_un *sunaddr = msg->msg_name;
1502        int err, size;
1503        struct sk_buff *skb;
1504        int sent = 0;
1505        struct scm_cookie tmp_scm;
1506        bool fds_sent = false;
1507
1508        if (NULL == siocb->scm)
1509                siocb->scm = &tmp_scm;
1510        wait_for_unix_gc();
1511        err = scm_send(sock, msg, siocb->scm);
1512        if (err < 0)
1513                return err;
1514
1515        err = -EOPNOTSUPP;
1516        if (msg->msg_flags&MSG_OOB)
1517                goto out_err;
1518
1519        if (msg->msg_namelen) {
1520                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1521                goto out_err;
1522        } else {
1523                sunaddr = NULL;
1524                err = -ENOTCONN;
1525                other = unix_peer(sk);
1526                if (!other)
1527                        goto out_err;
1528        }
1529
1530        if (sk->sk_shutdown & SEND_SHUTDOWN)
1531                goto pipe_err;
1532
1533        while (sent < len) {
1534                /*
1535                 *      Optimisation for the fact that under 0.01% of X
1536                 *      messages typically need breaking up.
1537                 */
1538
1539                size = len-sent;
1540
1541                /* Keep two messages in the pipe so it schedules better */
1542                if (size > ((sk->sk_sndbuf >> 1) - 64))
1543                        size = (sk->sk_sndbuf >> 1) - 64;
1544
1545                if (size > SKB_MAX_ALLOC)
1546                        size = SKB_MAX_ALLOC;
1547
1548                /*
1549                 *      Grab a buffer
1550                 */
1551
1552                skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1553                                          &err);
1554
1555                if (skb == NULL)
1556                        goto out_err;
1557
1558                /*
1559                 *      If you pass two values to the sock_alloc_send_skb
1560                 *      it tries to grab the large buffer with GFP_NOFS
1561                 *      (which can fail easily), and if it fails grab the
1562                 *      fallback size buffer which is under a page and will
1563                 *      succeed. [Alan]
1564                 */
1565                size = min_t(int, size, skb_tailroom(skb));
1566
1567                memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1568                /* Only send the fds in the first buffer */
1569                if (siocb->scm->fp && !fds_sent) {
1570                        err = unix_attach_fds(siocb->scm, skb);
1571                        if (err) {
1572                                kfree_skb(skb);
1573                                goto out_err;
1574                        }
1575                        fds_sent = true;
1576                }
1577
1578                err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1579                if (err) {
1580                        kfree_skb(skb);
1581                        goto out_err;
1582                }
1583
1584                unix_state_lock(other);
1585
1586                if (sock_flag(other, SOCK_DEAD) ||
1587                    (other->sk_shutdown & RCV_SHUTDOWN))
1588                        goto pipe_err_free;
1589
1590                skb_queue_tail(&other->sk_receive_queue, skb);
1591                unix_state_unlock(other);
1592                other->sk_data_ready(other, size);
1593                sent += size;
1594        }
1595
1596        scm_destroy(siocb->scm);
1597        siocb->scm = NULL;
1598
1599        return sent;
1600
1601pipe_err_free:
1602        unix_state_unlock(other);
1603        kfree_skb(skb);
1604pipe_err:
1605        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1606                send_sig(SIGPIPE, current, 0);
1607        err = -EPIPE;
1608out_err:
1609        scm_destroy(siocb->scm);
1610        siocb->scm = NULL;
1611        return sent ? : err;
1612}
1613
1614static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1615                                  struct msghdr *msg, size_t len)
1616{
1617        int err;
1618        struct sock *sk = sock->sk;
1619
1620        err = sock_error(sk);
1621        if (err)
1622                return err;
1623
1624        if (sk->sk_state != TCP_ESTABLISHED)
1625                return -ENOTCONN;
1626
1627        if (msg->msg_namelen)
1628                msg->msg_namelen = 0;
1629
1630        return unix_dgram_sendmsg(kiocb, sock, msg, len);
1631}
1632
1633static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1634{
1635        struct unix_sock *u = unix_sk(sk);
1636
1637        msg->msg_namelen = 0;
1638        if (u->addr) {
1639                msg->msg_namelen = u->addr->len;
1640                memcpy(msg->msg_name, u->addr->name, u->addr->len);
1641        }
1642}
1643
1644static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1645                              struct msghdr *msg, size_t size,
1646                              int flags)
1647{
1648        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1649        struct scm_cookie tmp_scm;
1650        struct sock *sk = sock->sk;
1651        struct unix_sock *u = unix_sk(sk);
1652        int noblock = flags & MSG_DONTWAIT;
1653        struct sk_buff *skb;
1654        int err;
1655
1656        err = -EOPNOTSUPP;
1657        if (flags&MSG_OOB)
1658                goto out;
1659
1660        msg->msg_namelen = 0;
1661
1662        mutex_lock(&u->readlock);
1663
1664        skb = skb_recv_datagram(sk, flags, noblock, &err);
1665        if (!skb) {
1666                unix_state_lock(sk);
1667                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1668                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1669                    (sk->sk_shutdown & RCV_SHUTDOWN))
1670                        err = 0;
1671                unix_state_unlock(sk);
1672                goto out_unlock;
1673        }
1674
1675        wake_up_interruptible_sync(&u->peer_wait);
1676
1677        if (msg->msg_name)
1678                unix_copy_addr(msg, skb->sk);
1679
1680        if (size > skb->len)
1681                size = skb->len;
1682        else if (size < skb->len)
1683                msg->msg_flags |= MSG_TRUNC;
1684
1685        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1686        if (err)
1687                goto out_free;
1688
1689        if (!siocb->scm) {
1690                siocb->scm = &tmp_scm;
1691                memset(&tmp_scm, 0, sizeof(tmp_scm));
1692        }
1693        siocb->scm->creds = *UNIXCREDS(skb);
1694        unix_set_secdata(siocb->scm, skb);
1695
1696        if (!(flags & MSG_PEEK)) {
1697                if (UNIXCB(skb).fp)
1698                        unix_detach_fds(siocb->scm, skb);
1699        } else {
1700                /* It is questionable: on PEEK we could:
1701                   - do not return fds - good, but too simple 8)
1702                   - return fds, and do not return them on read (old strategy,
1703                     apparently wrong)
1704                   - clone fds (I chose it for now, it is the most universal
1705                     solution)
1706
1707                   POSIX 1003.1g does not actually define this clearly
1708                   at all. POSIX 1003.1g doesn't define a lot of things
1709                   clearly however!
1710
1711                */
1712                if (UNIXCB(skb).fp)
1713                        siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1714        }
1715        err = size;
1716
1717        scm_recv(sock, msg, siocb->scm, flags);
1718
1719out_free:
1720        skb_free_datagram(sk, skb);
1721out_unlock:
1722        mutex_unlock(&u->readlock);
1723out:
1724        return err;
1725}
1726
1727/*
1728 *      Sleep until data has arrive. But check for races..
1729 */
1730
1731static long unix_stream_data_wait(struct sock *sk, long timeo)
1732{
1733        DEFINE_WAIT(wait);
1734
1735        unix_state_lock(sk);
1736
1737        for (;;) {
1738                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1739
1740                if (!skb_queue_empty(&sk->sk_receive_queue) ||
1741                    sk->sk_err ||
1742                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1743                    signal_pending(current) ||
1744                    !timeo)
1745                        break;
1746
1747                set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1748                unix_state_unlock(sk);
1749                timeo = schedule_timeout(timeo);
1750                unix_state_lock(sk);
1751                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1752        }
1753
1754        finish_wait(sk->sk_sleep, &wait);
1755        unix_state_unlock(sk);
1756        return timeo;
1757}
1758
1759
1760
1761static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1762                               struct msghdr *msg, size_t size,
1763                               int flags)
1764{
1765        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1766        struct scm_cookie tmp_scm;
1767        struct sock *sk = sock->sk;
1768        struct unix_sock *u = unix_sk(sk);
1769        struct sockaddr_un *sunaddr = msg->msg_name;
1770        int copied = 0;
1771        int check_creds = 0;
1772        int target;
1773        int err = 0;
1774        long timeo;
1775
1776        err = -EINVAL;
1777        if (sk->sk_state != TCP_ESTABLISHED)
1778                goto out;
1779
1780        err = -EOPNOTSUPP;
1781        if (flags&MSG_OOB)
1782                goto out;
1783
1784        target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1785        timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1786
1787        msg->msg_namelen = 0;
1788
1789        /* Lock the socket to prevent queue disordering
1790         * while sleeps in memcpy_tomsg
1791         */
1792
1793        if (!siocb->scm) {
1794                siocb->scm = &tmp_scm;
1795                memset(&tmp_scm, 0, sizeof(tmp_scm));
1796        }
1797
1798        mutex_lock(&u->readlock);
1799
1800        do {
1801                int chunk;
1802                struct sk_buff *skb;
1803
1804                unix_state_lock(sk);
1805                skb = skb_dequeue(&sk->sk_receive_queue);
1806                if (skb == NULL) {
1807                        if (copied >= target)
1808                                goto unlock;
1809
1810                        /*
1811                         *      POSIX 1003.1g mandates this order.
1812                         */
1813
1814                        err = sock_error(sk);
1815                        if (err)
1816                                goto unlock;
1817                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1818                                goto unlock;
1819
1820                        unix_state_unlock(sk);
1821                        err = -EAGAIN;
1822                        if (!timeo)
1823                                break;
1824                        mutex_unlock(&u->readlock);
1825
1826                        timeo = unix_stream_data_wait(sk, timeo);
1827
1828                        if (signal_pending(current)) {
1829                                err = sock_intr_errno(timeo);
1830                                goto out;
1831                        }
1832                        mutex_lock(&u->readlock);
1833                        continue;
1834 unlock:
1835                        unix_state_unlock(sk);
1836                        break;
1837                }
1838                unix_state_unlock(sk);
1839
1840                if (check_creds) {
1841                        /* Never glue messages from different writers */
1842                        if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1843                                   sizeof(siocb->scm->creds)) != 0) {
1844                                skb_queue_head(&sk->sk_receive_queue, skb);
1845                                break;
1846                        }
1847                } else {
1848                        /* Copy credentials */
1849                        siocb->scm->creds = *UNIXCREDS(skb);
1850                        check_creds = 1;
1851                }
1852
1853                /* Copy address just once */
1854                if (sunaddr) {
1855                        unix_copy_addr(msg, skb->sk);
1856                        sunaddr = NULL;
1857                }
1858
1859                chunk = min_t(unsigned int, skb->len, size);
1860                if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1861                        skb_queue_head(&sk->sk_receive_queue, skb);
1862                        if (copied == 0)
1863                                copied = -EFAULT;
1864                        break;
1865                }
1866                copied += chunk;
1867                size -= chunk;
1868
1869                /* Mark read part of skb as used */
1870                if (!(flags & MSG_PEEK)) {
1871                        skb_pull(skb, chunk);
1872
1873                        if (UNIXCB(skb).fp)
1874                                unix_detach_fds(siocb->scm, skb);
1875
1876                        /* put the skb back if we didn't use it up.. */
1877                        if (skb->len) {
1878                                skb_queue_head(&sk->sk_receive_queue, skb);
1879                                break;
1880                        }
1881
1882                        kfree_skb(skb);
1883
1884                        if (siocb->scm->fp)
1885                                break;
1886                } else {
1887                        /* It is questionable, see note in unix_dgram_recvmsg.
1888                         */
1889                        if (UNIXCB(skb).fp)
1890                                siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1891
1892                        /* put message back and return */
1893                        skb_queue_head(&sk->sk_receive_queue, skb);
1894                        break;
1895                }
1896        } while (size);
1897
1898        mutex_unlock(&u->readlock);
1899        scm_recv(sock, msg, siocb->scm, flags);
1900out:
1901        return copied ? : err;
1902}
1903
1904static int unix_shutdown(struct socket *sock, int mode)
1905{
1906        struct sock *sk = sock->sk;
1907        struct sock *other;
1908
1909        mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1910
1911        if (mode) {
1912                unix_state_lock(sk);
1913                sk->sk_shutdown |= mode;
1914                other = unix_peer(sk);
1915                if (other)
1916                        sock_hold(other);
1917                unix_state_unlock(sk);
1918                sk->sk_state_change(sk);
1919
1920                if (other &&
1921                        (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1922
1923                        int peer_mode = 0;
1924
1925                        if (mode&RCV_SHUTDOWN)
1926                                peer_mode |= SEND_SHUTDOWN;
1927                        if (mode&SEND_SHUTDOWN)
1928                                peer_mode |= RCV_SHUTDOWN;
1929                        unix_state_lock(other);
1930                        other->sk_shutdown |= peer_mode;
1931                        unix_state_unlock(other);
1932                        other->sk_state_change(other);
1933                        read_lock(&other->sk_callback_lock);
1934                        if (peer_mode == SHUTDOWN_MASK)
1935                                sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1936                        else if (peer_mode & RCV_SHUTDOWN)
1937                                sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1938                        read_unlock(&other->sk_callback_lock);
1939                }
1940                if (other)
1941                        sock_put(other);
1942        }
1943        return 0;
1944}
1945
1946static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1947{
1948        struct sock *sk = sock->sk;
1949        long amount = 0;
1950        int err;
1951
1952        switch (cmd) {
1953        case SIOCOUTQ:
1954                amount = sk_wmem_alloc_get(sk);
1955                err = put_user(amount, (int __user *)arg);
1956                break;
1957        case SIOCINQ:
1958                {
1959                        struct sk_buff *skb;
1960
1961                        if (sk->sk_state == TCP_LISTEN) {
1962                                err = -EINVAL;
1963                                break;
1964                        }
1965
1966                        spin_lock(&sk->sk_receive_queue.lock);
1967                        if (sk->sk_type == SOCK_STREAM ||
1968                            sk->sk_type == SOCK_SEQPACKET) {
1969                                skb_queue_walk(&sk->sk_receive_queue, skb)
1970                                        amount += skb->len;
1971                        } else {
1972                                skb = skb_peek(&sk->sk_receive_queue);
1973                                if (skb)
1974                                        amount = skb->len;
1975                        }
1976                        spin_unlock(&sk->sk_receive_queue.lock);
1977                        err = put_user(amount, (int __user *)arg);
1978                        break;
1979                }
1980
1981        default:
1982                err = -ENOIOCTLCMD;
1983                break;
1984        }
1985        return err;
1986}
1987
1988static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1989{
1990        struct sock *sk = sock->sk;
1991        unsigned int mask;
1992
1993        sock_poll_wait(file, sk->sk_sleep, wait);
1994        mask = 0;
1995
1996        /* exceptional events? */
1997        if (sk->sk_err)
1998                mask |= POLLERR;
1999        if (sk->sk_shutdown == SHUTDOWN_MASK)
2000                mask |= POLLHUP;
2001        if (sk->sk_shutdown & RCV_SHUTDOWN)
2002                mask |= POLLRDHUP;
2003
2004        /* readable? */
2005        if (!skb_queue_empty(&sk->sk_receive_queue) ||
2006            (sk->sk_shutdown & RCV_SHUTDOWN))
2007                mask |= POLLIN | POLLRDNORM;
2008
2009        /* Connection-based need to check for termination and startup */
2010        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2011            sk->sk_state == TCP_CLOSE)
2012                mask |= POLLHUP;
2013
2014        /*
2015         * we set writable also when the other side has shut down the
2016         * connection. This prevents stuck sockets.
2017         */
2018        if (unix_writable(sk))
2019                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2020
2021        return mask;
2022}
2023
2024static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2025                                    poll_table *wait)
2026{
2027        struct sock *sk = sock->sk, *other;
2028        unsigned int mask, writable;
2029
2030        sock_poll_wait(file, sk->sk_sleep, wait);
2031        mask = 0;
2032
2033        /* exceptional events? */
2034        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2035                mask |= POLLERR;
2036        if (sk->sk_shutdown & RCV_SHUTDOWN)
2037                mask |= POLLRDHUP;
2038        if (sk->sk_shutdown == SHUTDOWN_MASK)
2039                mask |= POLLHUP;
2040
2041        /* readable? */
2042        if (!skb_queue_empty(&sk->sk_receive_queue) ||
2043            (sk->sk_shutdown & RCV_SHUTDOWN))
2044                mask |= POLLIN | POLLRDNORM;
2045
2046        /* Connection-based need to check for termination and startup */
2047        if (sk->sk_type == SOCK_SEQPACKET) {
2048                if (sk->sk_state == TCP_CLOSE)
2049                        mask |= POLLHUP;
2050                /* connection hasn't started yet? */
2051                if (sk->sk_state == TCP_SYN_SENT)
2052                        return mask;
2053        }
2054
2055        /* writable? */
2056        writable = unix_writable(sk);
2057        if (writable) {
2058                other = unix_peer_get(sk);
2059                if (other) {
2060                        if (unix_peer(other) != sk) {
2061                                sock_poll_wait(file, &unix_sk(other)->peer_wait,
2062                                          wait);
2063                                if (unix_recvq_full(other))
2064                                        writable = 0;
2065                        }
2066
2067                        sock_put(other);
2068                }
2069        }
2070
2071        if (writable)
2072                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2073        else
2074                set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2075
2076        return mask;
2077}
2078
2079#ifdef CONFIG_PROC_FS
2080static struct sock *first_unix_socket(int *i)
2081{
2082        for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2083                if (!hlist_empty(&unix_socket_table[*i]))
2084                        return __sk_head(&unix_socket_table[*i]);
2085        }
2086        return NULL;
2087}
2088
2089static struct sock *next_unix_socket(int *i, struct sock *s)
2090{
2091        struct sock *next = sk_next(s);
2092        /* More in this chain? */
2093        if (next)
2094                return next;
2095        /* Look for next non-empty chain. */
2096        for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2097                if (!hlist_empty(&unix_socket_table[*i]))
2098                        return __sk_head(&unix_socket_table[*i]);
2099        }
2100        return NULL;
2101}
2102
2103struct unix_iter_state {
2104        struct seq_net_private p;
2105        int i;
2106};
2107
2108static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2109{
2110        struct unix_iter_state *iter = seq->private;
2111        loff_t off = 0;
2112        struct sock *s;
2113
2114        for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2115                if (sock_net(s) != seq_file_net(seq))
2116                        continue;
2117                if (off == pos)
2118                        return s;
2119                ++off;
2120        }
2121        return NULL;
2122}
2123
2124static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2125        __acquires(unix_table_lock)
2126{
2127        spin_lock(&unix_table_lock);
2128        return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2129}
2130
2131static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2132{
2133        struct unix_iter_state *iter = seq->private;
2134        struct sock *sk = v;
2135        ++*pos;
2136
2137        if (v == SEQ_START_TOKEN)
2138                sk = first_unix_socket(&iter->i);
2139        else
2140                sk = next_unix_socket(&iter->i, sk);
2141        while (sk && (sock_net(sk) != seq_file_net(seq)))
2142                sk = next_unix_socket(&iter->i, sk);
2143        return sk;
2144}
2145
2146static void unix_seq_stop(struct seq_file *seq, void *v)
2147        __releases(unix_table_lock)
2148{
2149        spin_unlock(&unix_table_lock);
2150}
2151
2152static int unix_seq_show(struct seq_file *seq, void *v)
2153{
2154
2155        if (v == SEQ_START_TOKEN)
2156                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2157                         "Inode Path\n");
2158        else {
2159                struct sock *s = v;
2160                struct unix_sock *u = unix_sk(s);
2161                unix_state_lock(s);
2162
2163                seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2164                        s,
2165                        atomic_read(&s->sk_refcnt),
2166                        0,
2167                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2168                        s->sk_type,
2169                        s->sk_socket ?
2170                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2171                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2172                        sock_i_ino(s));
2173
2174                if (u->addr) {
2175                        int i, len;
2176                        seq_putc(seq, ' ');
2177
2178                        i = 0;
2179                        len = u->addr->len - sizeof(short);
2180                        if (!UNIX_ABSTRACT(s))
2181                                len--;
2182                        else {
2183                                seq_putc(seq, '@');
2184                                i++;
2185                        }
2186                        for ( ; i < len; i++)
2187                                seq_putc(seq, u->addr->name->sun_path[i]);
2188                }
2189                unix_state_unlock(s);
2190                seq_putc(seq, '\n');
2191        }
2192
2193        return 0;
2194}
2195
2196static const struct seq_operations unix_seq_ops = {
2197        .start  = unix_seq_start,
2198        .next   = unix_seq_next,
2199        .stop   = unix_seq_stop,
2200        .show   = unix_seq_show,
2201};
2202
2203static int unix_seq_open(struct inode *inode, struct file *file)
2204{
2205        return seq_open_net(inode, file, &unix_seq_ops,
2206                            sizeof(struct unix_iter_state));
2207}
2208
2209static const struct file_operations unix_seq_fops = {
2210        .owner          = THIS_MODULE,
2211        .open           = unix_seq_open,
2212        .read           = seq_read,
2213        .llseek         = seq_lseek,
2214        .release        = seq_release_net,
2215};
2216
2217#endif
2218
2219static struct net_proto_family unix_family_ops = {
2220        .family = PF_UNIX,
2221        .create = unix_create,
2222        .owner  = THIS_MODULE,
2223};
2224
2225
2226static int unix_net_init(struct net *net)
2227{
2228        int error = -ENOMEM;
2229
2230        net->unx.sysctl_max_dgram_qlen = 10;
2231        if (unix_sysctl_register(net))
2232                goto out;
2233
2234#ifdef CONFIG_PROC_FS
2235        if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2236                unix_sysctl_unregister(net);
2237                goto out;
2238        }
2239#endif
2240        error = 0;
2241out:
2242        return error;
2243}
2244
2245static void unix_net_exit(struct net *net)
2246{
2247        unix_sysctl_unregister(net);
2248        proc_net_remove(net, "unix");
2249}
2250
2251static struct pernet_operations unix_net_ops = {
2252        .init = unix_net_init,
2253        .exit = unix_net_exit,
2254};
2255
2256static int __init af_unix_init(void)
2257{
2258        int rc = -1;
2259        struct sk_buff *dummy_skb;
2260
2261        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2262
2263        rc = proto_register(&unix_proto, 1);
2264        if (rc != 0) {
2265                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2266                       __func__);
2267                goto out;
2268        }
2269
2270        sock_register(&unix_family_ops);
2271        register_pernet_subsys(&unix_net_ops);
2272out:
2273        return rc;
2274}
2275
2276static void __exit af_unix_exit(void)
2277{
2278        sock_unregister(PF_UNIX);
2279        proto_unregister(&unix_proto);
2280        unregister_pernet_subsys(&unix_net_ops);
2281}
2282
2283/* Earlier than device_initcall() so that other drivers invoking
2284   request_module() don't end up in a loop when modprobe tries
2285   to use a UNIX socket. But later than subsys_initcall() because
2286   we depend on stuff initialised there */
2287fs_initcall(af_unix_init);
2288module_exit(af_unix_exit);
2289
2290MODULE_LICENSE("GPL");
2291MODULE_ALIAS_NETPROTO(PF_UNIX);
2292