linux/net/unix/af_unix.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * NET4:        Implementation of BSD Unix domain sockets.
   4 *
   5 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6 *
   7 * Fixes:
   8 *              Linus Torvalds  :       Assorted bug cures.
   9 *              Niibe Yutaka    :       async I/O support.
  10 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11 *              Alan Cox        :       Limit size of allocated blocks.
  12 *              Alan Cox        :       Fixed the stupid socketpair bug.
  13 *              Alan Cox        :       BSD compatibility fine tuning.
  14 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15 *              Alan Cox        :       Sorted out a proper draft version of
  16 *                                      file descriptor passing hacked up from
  17 *                                      Mike Shaver's work.
  18 *              Marty Leisner   :       Fixes to fd passing
  19 *              Nick Nevin      :       recvmsg bugfix.
  20 *              Alan Cox        :       Started proper garbage collector
  21 *              Heiko EiBfeldt  :       Missing verify_area check
  22 *              Alan Cox        :       Started POSIXisms
  23 *              Andreas Schwab  :       Replace inode by dentry for proper
  24 *                                      reference counting
  25 *              Kirk Petersen   :       Made this a module
  26 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27 *                                      Lots of bug fixes.
  28 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29 *                                      by above two patches.
  30 *           Andrea Arcangeli   :       If possible we block in connect(2)
  31 *                                      if the max backlog of the listen socket
  32 *                                      is been reached. This won't break
  33 *                                      old apps and it will avoid huge amount
  34 *                                      of socks hashed (this for unix_gc()
  35 *                                      performances reasons).
  36 *                                      Security fix that limits the max
  37 *                                      number of socks to 2*max_files and
  38 *                                      the number of skb queueable in the
  39 *                                      dgram receiver.
  40 *              Artur Skawina   :       Hash function optimizations
  41 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42 *            Malcolm Beattie   :       Set peercred for socketpair
  43 *           Michal Ostrowski   :       Module initialization cleanup.
  44 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45 *                                      the core infrastructure is doing that
  46 *                                      for all net proto families now (2.5.69+)
  47 *
  48 * Known differences from reference BSD that was tested:
  49 *
  50 *      [TO FIX]
  51 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52 *              other the moment one end closes.
  53 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55 *      [NOT TO FIX]
  56 *      accept() returns a path name even if the connecting socket has closed
  57 *              in the meantime (BSD loses the path and gives up).
  58 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61 *      BSD af_unix apparently has connect forgetting to block properly.
  62 *              (need to check this with the POSIX spec in detail)
  63 *
  64 * Differences from 2.0.0-11-... (ANK)
  65 *      Bug fixes and improvements.
  66 *              - client shutdown killed server socket.
  67 *              - removed all useless cli/sti pairs.
  68 *
  69 *      Semantic changes/extensions.
  70 *              - generic control message passing.
  71 *              - SCM_CREDENTIALS control message.
  72 *              - "Abstract" (not FS based) socket bindings.
  73 *                Abstract names are sequences of bytes (not zero terminated)
  74 *                started by 0, so that this name space does not intersect
  75 *                with BSD names.
  76 */
  77
  78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80#include <linux/module.h>
  81#include <linux/kernel.h>
  82#include <linux/signal.h>
  83#include <linux/sched/signal.h>
  84#include <linux/errno.h>
  85#include <linux/string.h>
  86#include <linux/stat.h>
  87#include <linux/dcache.h>
  88#include <linux/namei.h>
  89#include <linux/socket.h>
  90#include <linux/un.h>
  91#include <linux/fcntl.h>
  92#include <linux/termios.h>
  93#include <linux/sockios.h>
  94#include <linux/net.h>
  95#include <linux/in.h>
  96#include <linux/fs.h>
  97#include <linux/slab.h>
  98#include <linux/uaccess.h>
  99#include <linux/skbuff.h>
 100#include <linux/netdevice.h>
 101#include <net/net_namespace.h>
 102#include <net/sock.h>
 103#include <net/tcp_states.h>
 104#include <net/af_unix.h>
 105#include <linux/proc_fs.h>
 106#include <linux/seq_file.h>
 107#include <net/scm.h>
 108#include <linux/init.h>
 109#include <linux/poll.h>
 110#include <linux/rtnetlink.h>
 111#include <linux/mount.h>
 112#include <net/checksum.h>
 113#include <linux/security.h>
 114#include <linux/freezer.h>
 115#include <linux/file.h>
 116
 117#include "scm.h"
 118
 119struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120EXPORT_SYMBOL_GPL(unix_socket_table);
 121DEFINE_SPINLOCK(unix_table_lock);
 122EXPORT_SYMBOL_GPL(unix_table_lock);
 123static atomic_long_t unix_nr_socks;
 124
 125
 126static struct hlist_head *unix_sockets_unbound(void *addr)
 127{
 128        unsigned long hash = (unsigned long)addr;
 129
 130        hash ^= hash >> 16;
 131        hash ^= hash >> 8;
 132        hash %= UNIX_HASH_SIZE;
 133        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134}
 135
 136#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138#ifdef CONFIG_SECURITY_NETWORK
 139static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140{
 141        UNIXCB(skb).secid = scm->secid;
 142}
 143
 144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145{
 146        scm->secid = UNIXCB(skb).secid;
 147}
 148
 149static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150{
 151        return (scm->secid == UNIXCB(skb).secid);
 152}
 153#else
 154static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155{ }
 156
 157static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158{ }
 159
 160static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161{
 162        return true;
 163}
 164#endif /* CONFIG_SECURITY_NETWORK */
 165
 166/*
 167 *  SMP locking strategy:
 168 *    hash table is protected with spinlock unix_table_lock
 169 *    each socket state is protected by separate spin lock.
 170 */
 171
 172static inline unsigned int unix_hash_fold(__wsum n)
 173{
 174        unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176        hash ^= hash>>8;
 177        return hash&(UNIX_HASH_SIZE-1);
 178}
 179
 180#define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183{
 184        return unix_peer(osk) == sk;
 185}
 186
 187static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188{
 189        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190}
 191
 192static inline int unix_recvq_full(struct sock const *sk)
 193{
 194        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195}
 196
 197struct sock *unix_peer_get(struct sock *s)
 198{
 199        struct sock *peer;
 200
 201        unix_state_lock(s);
 202        peer = unix_peer(s);
 203        if (peer)
 204                sock_hold(peer);
 205        unix_state_unlock(s);
 206        return peer;
 207}
 208EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210static inline void unix_release_addr(struct unix_address *addr)
 211{
 212        if (refcount_dec_and_test(&addr->refcnt))
 213                kfree(addr);
 214}
 215
 216/*
 217 *      Check unix socket name:
 218 *              - should be not zero length.
 219 *              - if started by not zero, should be NULL terminated (FS object)
 220 *              - if started by zero, it is abstract name.
 221 */
 222
 223static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224{
 225        *hashp = 0;
 226
 227        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                return -EINVAL;
 229        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                return -EINVAL;
 231        if (sunaddr->sun_path[0]) {
 232                /*
 233                 * This may look like an off by one error but it is a bit more
 234                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                 * sun_path[108] doesn't as such exist.  However in kernel space
 236                 * we are guaranteed that it is a valid memory location in our
 237                 * kernel address buffer.
 238                 */
 239                ((char *)sunaddr)[len] = 0;
 240                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                return len;
 242        }
 243
 244        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245        return len;
 246}
 247
 248static void __unix_remove_socket(struct sock *sk)
 249{
 250        sk_del_node_init(sk);
 251}
 252
 253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254{
 255        WARN_ON(!sk_unhashed(sk));
 256        sk_add_node(sk, list);
 257}
 258
 259static inline void unix_remove_socket(struct sock *sk)
 260{
 261        spin_lock(&unix_table_lock);
 262        __unix_remove_socket(sk);
 263        spin_unlock(&unix_table_lock);
 264}
 265
 266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267{
 268        spin_lock(&unix_table_lock);
 269        __unix_insert_socket(list, sk);
 270        spin_unlock(&unix_table_lock);
 271}
 272
 273static struct sock *__unix_find_socket_byname(struct net *net,
 274                                              struct sockaddr_un *sunname,
 275                                              int len, int type, unsigned int hash)
 276{
 277        struct sock *s;
 278
 279        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                struct unix_sock *u = unix_sk(s);
 281
 282                if (!net_eq(sock_net(s), net))
 283                        continue;
 284
 285                if (u->addr->len == len &&
 286                    !memcmp(u->addr->name, sunname, len))
 287                        goto found;
 288        }
 289        s = NULL;
 290found:
 291        return s;
 292}
 293
 294static inline struct sock *unix_find_socket_byname(struct net *net,
 295                                                   struct sockaddr_un *sunname,
 296                                                   int len, int type,
 297                                                   unsigned int hash)
 298{
 299        struct sock *s;
 300
 301        spin_lock(&unix_table_lock);
 302        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 303        if (s)
 304                sock_hold(s);
 305        spin_unlock(&unix_table_lock);
 306        return s;
 307}
 308
 309static struct sock *unix_find_socket_byinode(struct inode *i)
 310{
 311        struct sock *s;
 312
 313        spin_lock(&unix_table_lock);
 314        sk_for_each(s,
 315                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 316                struct dentry *dentry = unix_sk(s)->path.dentry;
 317
 318                if (dentry && d_backing_inode(dentry) == i) {
 319                        sock_hold(s);
 320                        goto found;
 321                }
 322        }
 323        s = NULL;
 324found:
 325        spin_unlock(&unix_table_lock);
 326        return s;
 327}
 328
 329/* Support code for asymmetrically connected dgram sockets
 330 *
 331 * If a datagram socket is connected to a socket not itself connected
 332 * to the first socket (eg, /dev/log), clients may only enqueue more
 333 * messages if the present receive queue of the server socket is not
 334 * "too large". This means there's a second writeability condition
 335 * poll and sendmsg need to test. The dgram recv code will do a wake
 336 * up on the peer_wait wait queue of a socket upon reception of a
 337 * datagram which needs to be propagated to sleeping would-be writers
 338 * since these might not have sent anything so far. This can't be
 339 * accomplished via poll_wait because the lifetime of the server
 340 * socket might be less than that of its clients if these break their
 341 * association with it or if the server socket is closed while clients
 342 * are still connected to it and there's no way to inform "a polling
 343 * implementation" that it should let go of a certain wait queue
 344 *
 345 * In order to propagate a wake up, a wait_queue_entry_t of the client
 346 * socket is enqueued on the peer_wait queue of the server socket
 347 * whose wake function does a wake_up on the ordinary client socket
 348 * wait queue. This connection is established whenever a write (or
 349 * poll for write) hit the flow control condition and broken when the
 350 * association to the server socket is dissolved or after a wake up
 351 * was relayed.
 352 */
 353
 354static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 355                                      void *key)
 356{
 357        struct unix_sock *u;
 358        wait_queue_head_t *u_sleep;
 359
 360        u = container_of(q, struct unix_sock, peer_wake);
 361
 362        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 363                            q);
 364        u->peer_wake.private = NULL;
 365
 366        /* relaying can only happen while the wq still exists */
 367        u_sleep = sk_sleep(&u->sk);
 368        if (u_sleep)
 369                wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 370
 371        return 0;
 372}
 373
 374static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 375{
 376        struct unix_sock *u, *u_other;
 377        int rc;
 378
 379        u = unix_sk(sk);
 380        u_other = unix_sk(other);
 381        rc = 0;
 382        spin_lock(&u_other->peer_wait.lock);
 383
 384        if (!u->peer_wake.private) {
 385                u->peer_wake.private = other;
 386                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 387
 388                rc = 1;
 389        }
 390
 391        spin_unlock(&u_other->peer_wait.lock);
 392        return rc;
 393}
 394
 395static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 396                                            struct sock *other)
 397{
 398        struct unix_sock *u, *u_other;
 399
 400        u = unix_sk(sk);
 401        u_other = unix_sk(other);
 402        spin_lock(&u_other->peer_wait.lock);
 403
 404        if (u->peer_wake.private == other) {
 405                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 406                u->peer_wake.private = NULL;
 407        }
 408
 409        spin_unlock(&u_other->peer_wait.lock);
 410}
 411
 412static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 413                                                   struct sock *other)
 414{
 415        unix_dgram_peer_wake_disconnect(sk, other);
 416        wake_up_interruptible_poll(sk_sleep(sk),
 417                                   EPOLLOUT |
 418                                   EPOLLWRNORM |
 419                                   EPOLLWRBAND);
 420}
 421
 422/* preconditions:
 423 *      - unix_peer(sk) == other
 424 *      - association is stable
 425 */
 426static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 427{
 428        int connected;
 429
 430        connected = unix_dgram_peer_wake_connect(sk, other);
 431
 432        /* If other is SOCK_DEAD, we want to make sure we signal
 433         * POLLOUT, such that a subsequent write() can get a
 434         * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 435         * to other and its full, we will hang waiting for POLLOUT.
 436         */
 437        if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 438                return 1;
 439
 440        if (connected)
 441                unix_dgram_peer_wake_disconnect(sk, other);
 442
 443        return 0;
 444}
 445
 446static int unix_writable(const struct sock *sk)
 447{
 448        return sk->sk_state != TCP_LISTEN &&
 449               (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 450}
 451
 452static void unix_write_space(struct sock *sk)
 453{
 454        struct socket_wq *wq;
 455
 456        rcu_read_lock();
 457        if (unix_writable(sk)) {
 458                wq = rcu_dereference(sk->sk_wq);
 459                if (skwq_has_sleeper(wq))
 460                        wake_up_interruptible_sync_poll(&wq->wait,
 461                                EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 462                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 463        }
 464        rcu_read_unlock();
 465}
 466
 467/* When dgram socket disconnects (or changes its peer), we clear its receive
 468 * queue of packets arrived from previous peer. First, it allows to do
 469 * flow control based only on wmem_alloc; second, sk connected to peer
 470 * may receive messages only from that peer. */
 471static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 472{
 473        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 474                skb_queue_purge(&sk->sk_receive_queue);
 475                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 476
 477                /* If one link of bidirectional dgram pipe is disconnected,
 478                 * we signal error. Messages are lost. Do not make this,
 479                 * when peer was not connected to us.
 480                 */
 481                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 482                        other->sk_err = ECONNRESET;
 483                        other->sk_error_report(other);
 484                }
 485        }
 486}
 487
 488static void unix_sock_destructor(struct sock *sk)
 489{
 490        struct unix_sock *u = unix_sk(sk);
 491
 492        skb_queue_purge(&sk->sk_receive_queue);
 493
 494        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 495        WARN_ON(!sk_unhashed(sk));
 496        WARN_ON(sk->sk_socket);
 497        if (!sock_flag(sk, SOCK_DEAD)) {
 498                pr_info("Attempt to release alive unix socket: %p\n", sk);
 499                return;
 500        }
 501
 502        if (u->addr)
 503                unix_release_addr(u->addr);
 504
 505        atomic_long_dec(&unix_nr_socks);
 506        local_bh_disable();
 507        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 508        local_bh_enable();
 509#ifdef UNIX_REFCNT_DEBUG
 510        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 511                atomic_long_read(&unix_nr_socks));
 512#endif
 513}
 514
 515static void unix_release_sock(struct sock *sk, int embrion)
 516{
 517        struct unix_sock *u = unix_sk(sk);
 518        struct path path;
 519        struct sock *skpair;
 520        struct sk_buff *skb;
 521        int state;
 522
 523        unix_remove_socket(sk);
 524
 525        /* Clear state */
 526        unix_state_lock(sk);
 527        sock_orphan(sk);
 528        sk->sk_shutdown = SHUTDOWN_MASK;
 529        path         = u->path;
 530        u->path.dentry = NULL;
 531        u->path.mnt = NULL;
 532        state = sk->sk_state;
 533        sk->sk_state = TCP_CLOSE;
 534        unix_state_unlock(sk);
 535
 536        wake_up_interruptible_all(&u->peer_wait);
 537
 538        skpair = unix_peer(sk);
 539
 540        if (skpair != NULL) {
 541                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 542                        unix_state_lock(skpair);
 543                        /* No more writes */
 544                        skpair->sk_shutdown = SHUTDOWN_MASK;
 545                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 546                                skpair->sk_err = ECONNRESET;
 547                        unix_state_unlock(skpair);
 548                        skpair->sk_state_change(skpair);
 549                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 550                }
 551
 552                unix_dgram_peer_wake_disconnect(sk, skpair);
 553                sock_put(skpair); /* It may now die */
 554                unix_peer(sk) = NULL;
 555        }
 556
 557        /* Try to flush out this socket. Throw out buffers at least */
 558
 559        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 560                if (state == TCP_LISTEN)
 561                        unix_release_sock(skb->sk, 1);
 562                /* passed fds are erased in the kfree_skb hook        */
 563                UNIXCB(skb).consumed = skb->len;
 564                kfree_skb(skb);
 565        }
 566
 567        if (path.dentry)
 568                path_put(&path);
 569
 570        sock_put(sk);
 571
 572        /* ---- Socket is dead now and most probably destroyed ---- */
 573
 574        /*
 575         * Fixme: BSD difference: In BSD all sockets connected to us get
 576         *        ECONNRESET and we die on the spot. In Linux we behave
 577         *        like files and pipes do and wait for the last
 578         *        dereference.
 579         *
 580         * Can't we simply set sock->err?
 581         *
 582         *        What the above comment does talk about? --ANK(980817)
 583         */
 584
 585        if (unix_tot_inflight)
 586                unix_gc();              /* Garbage collect fds */
 587}
 588
 589static void init_peercred(struct sock *sk)
 590{
 591        put_pid(sk->sk_peer_pid);
 592        if (sk->sk_peer_cred)
 593                put_cred(sk->sk_peer_cred);
 594        sk->sk_peer_pid  = get_pid(task_tgid(current));
 595        sk->sk_peer_cred = get_current_cred();
 596}
 597
 598static void copy_peercred(struct sock *sk, struct sock *peersk)
 599{
 600        put_pid(sk->sk_peer_pid);
 601        if (sk->sk_peer_cred)
 602                put_cred(sk->sk_peer_cred);
 603        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 604        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 605}
 606
 607static int unix_listen(struct socket *sock, int backlog)
 608{
 609        int err;
 610        struct sock *sk = sock->sk;
 611        struct unix_sock *u = unix_sk(sk);
 612        struct pid *old_pid = NULL;
 613
 614        err = -EOPNOTSUPP;
 615        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 616                goto out;       /* Only stream/seqpacket sockets accept */
 617        err = -EINVAL;
 618        if (!u->addr)
 619                goto out;       /* No listens on an unbound socket */
 620        unix_state_lock(sk);
 621        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 622                goto out_unlock;
 623        if (backlog > sk->sk_max_ack_backlog)
 624                wake_up_interruptible_all(&u->peer_wait);
 625        sk->sk_max_ack_backlog  = backlog;
 626        sk->sk_state            = TCP_LISTEN;
 627        /* set credentials so connect can copy them */
 628        init_peercred(sk);
 629        err = 0;
 630
 631out_unlock:
 632        unix_state_unlock(sk);
 633        put_pid(old_pid);
 634out:
 635        return err;
 636}
 637
 638static int unix_release(struct socket *);
 639static int unix_bind(struct socket *, struct sockaddr *, int);
 640static int unix_stream_connect(struct socket *, struct sockaddr *,
 641                               int addr_len, int flags);
 642static int unix_socketpair(struct socket *, struct socket *);
 643static int unix_accept(struct socket *, struct socket *, int, bool);
 644static int unix_getname(struct socket *, struct sockaddr *, int);
 645static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 646static __poll_t unix_dgram_poll(struct file *, struct socket *,
 647                                    poll_table *);
 648static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 649static int unix_shutdown(struct socket *, int);
 650static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 651static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 652static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 653                                    size_t size, int flags);
 654static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 655                                       struct pipe_inode_info *, size_t size,
 656                                       unsigned int flags);
 657static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 658static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 659static int unix_dgram_connect(struct socket *, struct sockaddr *,
 660                              int, int);
 661static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 662static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 663                                  int);
 664
 665static int unix_set_peek_off(struct sock *sk, int val)
 666{
 667        struct unix_sock *u = unix_sk(sk);
 668
 669        if (mutex_lock_interruptible(&u->iolock))
 670                return -EINTR;
 671
 672        sk->sk_peek_off = val;
 673        mutex_unlock(&u->iolock);
 674
 675        return 0;
 676}
 677
 678
 679static const struct proto_ops unix_stream_ops = {
 680        .family =       PF_UNIX,
 681        .owner =        THIS_MODULE,
 682        .release =      unix_release,
 683        .bind =         unix_bind,
 684        .connect =      unix_stream_connect,
 685        .socketpair =   unix_socketpair,
 686        .accept =       unix_accept,
 687        .getname =      unix_getname,
 688        .poll =         unix_poll,
 689        .ioctl =        unix_ioctl,
 690        .listen =       unix_listen,
 691        .shutdown =     unix_shutdown,
 692        .setsockopt =   sock_no_setsockopt,
 693        .getsockopt =   sock_no_getsockopt,
 694        .sendmsg =      unix_stream_sendmsg,
 695        .recvmsg =      unix_stream_recvmsg,
 696        .mmap =         sock_no_mmap,
 697        .sendpage =     unix_stream_sendpage,
 698        .splice_read =  unix_stream_splice_read,
 699        .set_peek_off = unix_set_peek_off,
 700};
 701
 702static const struct proto_ops unix_dgram_ops = {
 703        .family =       PF_UNIX,
 704        .owner =        THIS_MODULE,
 705        .release =      unix_release,
 706        .bind =         unix_bind,
 707        .connect =      unix_dgram_connect,
 708        .socketpair =   unix_socketpair,
 709        .accept =       sock_no_accept,
 710        .getname =      unix_getname,
 711        .poll =         unix_dgram_poll,
 712        .ioctl =        unix_ioctl,
 713        .listen =       sock_no_listen,
 714        .shutdown =     unix_shutdown,
 715        .setsockopt =   sock_no_setsockopt,
 716        .getsockopt =   sock_no_getsockopt,
 717        .sendmsg =      unix_dgram_sendmsg,
 718        .recvmsg =      unix_dgram_recvmsg,
 719        .mmap =         sock_no_mmap,
 720        .sendpage =     sock_no_sendpage,
 721        .set_peek_off = unix_set_peek_off,
 722};
 723
 724static const struct proto_ops unix_seqpacket_ops = {
 725        .family =       PF_UNIX,
 726        .owner =        THIS_MODULE,
 727        .release =      unix_release,
 728        .bind =         unix_bind,
 729        .connect =      unix_stream_connect,
 730        .socketpair =   unix_socketpair,
 731        .accept =       unix_accept,
 732        .getname =      unix_getname,
 733        .poll =         unix_dgram_poll,
 734        .ioctl =        unix_ioctl,
 735        .listen =       unix_listen,
 736        .shutdown =     unix_shutdown,
 737        .setsockopt =   sock_no_setsockopt,
 738        .getsockopt =   sock_no_getsockopt,
 739        .sendmsg =      unix_seqpacket_sendmsg,
 740        .recvmsg =      unix_seqpacket_recvmsg,
 741        .mmap =         sock_no_mmap,
 742        .sendpage =     sock_no_sendpage,
 743        .set_peek_off = unix_set_peek_off,
 744};
 745
 746static struct proto unix_proto = {
 747        .name                   = "UNIX",
 748        .owner                  = THIS_MODULE,
 749        .obj_size               = sizeof(struct unix_sock),
 750};
 751
 752static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 753{
 754        struct sock *sk = NULL;
 755        struct unix_sock *u;
 756
 757        atomic_long_inc(&unix_nr_socks);
 758        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 759                goto out;
 760
 761        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 762        if (!sk)
 763                goto out;
 764
 765        sock_init_data(sock, sk);
 766
 767        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 768        sk->sk_write_space      = unix_write_space;
 769        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 770        sk->sk_destruct         = unix_sock_destructor;
 771        u         = unix_sk(sk);
 772        u->path.dentry = NULL;
 773        u->path.mnt = NULL;
 774        spin_lock_init(&u->lock);
 775        atomic_long_set(&u->inflight, 0);
 776        INIT_LIST_HEAD(&u->link);
 777        mutex_init(&u->iolock); /* single task reading lock */
 778        mutex_init(&u->bindlock); /* single task binding lock */
 779        init_waitqueue_head(&u->peer_wait);
 780        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 781        unix_insert_socket(unix_sockets_unbound(sk), sk);
 782out:
 783        if (sk == NULL)
 784                atomic_long_dec(&unix_nr_socks);
 785        else {
 786                local_bh_disable();
 787                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 788                local_bh_enable();
 789        }
 790        return sk;
 791}
 792
 793static int unix_create(struct net *net, struct socket *sock, int protocol,
 794                       int kern)
 795{
 796        if (protocol && protocol != PF_UNIX)
 797                return -EPROTONOSUPPORT;
 798
 799        sock->state = SS_UNCONNECTED;
 800
 801        switch (sock->type) {
 802        case SOCK_STREAM:
 803                sock->ops = &unix_stream_ops;
 804                break;
 805                /*
 806                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 807                 *      nothing uses it.
 808                 */
 809        case SOCK_RAW:
 810                sock->type = SOCK_DGRAM;
 811                /* fall through */
 812        case SOCK_DGRAM:
 813                sock->ops = &unix_dgram_ops;
 814                break;
 815        case SOCK_SEQPACKET:
 816                sock->ops = &unix_seqpacket_ops;
 817                break;
 818        default:
 819                return -ESOCKTNOSUPPORT;
 820        }
 821
 822        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 823}
 824
 825static int unix_release(struct socket *sock)
 826{
 827        struct sock *sk = sock->sk;
 828
 829        if (!sk)
 830                return 0;
 831
 832        unix_release_sock(sk, 0);
 833        sock->sk = NULL;
 834
 835        return 0;
 836}
 837
 838static int unix_autobind(struct socket *sock)
 839{
 840        struct sock *sk = sock->sk;
 841        struct net *net = sock_net(sk);
 842        struct unix_sock *u = unix_sk(sk);
 843        static u32 ordernum = 1;
 844        struct unix_address *addr;
 845        int err;
 846        unsigned int retries = 0;
 847
 848        err = mutex_lock_interruptible(&u->bindlock);
 849        if (err)
 850                return err;
 851
 852        err = 0;
 853        if (u->addr)
 854                goto out;
 855
 856        err = -ENOMEM;
 857        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 858        if (!addr)
 859                goto out;
 860
 861        addr->name->sun_family = AF_UNIX;
 862        refcount_set(&addr->refcnt, 1);
 863
 864retry:
 865        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 866        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 867
 868        spin_lock(&unix_table_lock);
 869        ordernum = (ordernum+1)&0xFFFFF;
 870
 871        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 872                                      addr->hash)) {
 873                spin_unlock(&unix_table_lock);
 874                /*
 875                 * __unix_find_socket_byname() may take long time if many names
 876                 * are already in use.
 877                 */
 878                cond_resched();
 879                /* Give up if all names seems to be in use. */
 880                if (retries++ == 0xFFFFF) {
 881                        err = -ENOSPC;
 882                        kfree(addr);
 883                        goto out;
 884                }
 885                goto retry;
 886        }
 887        addr->hash ^= sk->sk_type;
 888
 889        __unix_remove_socket(sk);
 890        smp_store_release(&u->addr, addr);
 891        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 892        spin_unlock(&unix_table_lock);
 893        err = 0;
 894
 895out:    mutex_unlock(&u->bindlock);
 896        return err;
 897}
 898
 899static struct sock *unix_find_other(struct net *net,
 900                                    struct sockaddr_un *sunname, int len,
 901                                    int type, unsigned int hash, int *error)
 902{
 903        struct sock *u;
 904        struct path path;
 905        int err = 0;
 906
 907        if (sunname->sun_path[0]) {
 908                struct inode *inode;
 909                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 910                if (err)
 911                        goto fail;
 912                inode = d_backing_inode(path.dentry);
 913                err = inode_permission(inode, MAY_WRITE);
 914                if (err)
 915                        goto put_fail;
 916
 917                err = -ECONNREFUSED;
 918                if (!S_ISSOCK(inode->i_mode))
 919                        goto put_fail;
 920                u = unix_find_socket_byinode(inode);
 921                if (!u)
 922                        goto put_fail;
 923
 924                if (u->sk_type == type)
 925                        touch_atime(&path);
 926
 927                path_put(&path);
 928
 929                err = -EPROTOTYPE;
 930                if (u->sk_type != type) {
 931                        sock_put(u);
 932                        goto fail;
 933                }
 934        } else {
 935                err = -ECONNREFUSED;
 936                u = unix_find_socket_byname(net, sunname, len, type, hash);
 937                if (u) {
 938                        struct dentry *dentry;
 939                        dentry = unix_sk(u)->path.dentry;
 940                        if (dentry)
 941                                touch_atime(&unix_sk(u)->path);
 942                } else
 943                        goto fail;
 944        }
 945        return u;
 946
 947put_fail:
 948        path_put(&path);
 949fail:
 950        *error = err;
 951        return NULL;
 952}
 953
 954static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 955{
 956        struct dentry *dentry;
 957        struct path path;
 958        int err = 0;
 959        /*
 960         * Get the parent directory, calculate the hash for last
 961         * component.
 962         */
 963        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 964        err = PTR_ERR(dentry);
 965        if (IS_ERR(dentry))
 966                return err;
 967
 968        /*
 969         * All right, let's create it.
 970         */
 971        err = security_path_mknod(&path, dentry, mode, 0);
 972        if (!err) {
 973                err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 974                if (!err) {
 975                        res->mnt = mntget(path.mnt);
 976                        res->dentry = dget(dentry);
 977                }
 978        }
 979        done_path_create(&path, dentry);
 980        return err;
 981}
 982
 983static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 984{
 985        struct sock *sk = sock->sk;
 986        struct net *net = sock_net(sk);
 987        struct unix_sock *u = unix_sk(sk);
 988        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 989        char *sun_path = sunaddr->sun_path;
 990        int err;
 991        unsigned int hash;
 992        struct unix_address *addr;
 993        struct hlist_head *list;
 994        struct path path = { };
 995
 996        err = -EINVAL;
 997        if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 998            sunaddr->sun_family != AF_UNIX)
 999                goto out;
1000
1001        if (addr_len == sizeof(short)) {
1002                err = unix_autobind(sock);
1003                goto out;
1004        }
1005
1006        err = unix_mkname(sunaddr, addr_len, &hash);
1007        if (err < 0)
1008                goto out;
1009        addr_len = err;
1010
1011        if (sun_path[0]) {
1012                umode_t mode = S_IFSOCK |
1013                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1014                err = unix_mknod(sun_path, mode, &path);
1015                if (err) {
1016                        if (err == -EEXIST)
1017                                err = -EADDRINUSE;
1018                        goto out;
1019                }
1020        }
1021
1022        err = mutex_lock_interruptible(&u->bindlock);
1023        if (err)
1024                goto out_put;
1025
1026        err = -EINVAL;
1027        if (u->addr)
1028                goto out_up;
1029
1030        err = -ENOMEM;
1031        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1032        if (!addr)
1033                goto out_up;
1034
1035        memcpy(addr->name, sunaddr, addr_len);
1036        addr->len = addr_len;
1037        addr->hash = hash ^ sk->sk_type;
1038        refcount_set(&addr->refcnt, 1);
1039
1040        if (sun_path[0]) {
1041                addr->hash = UNIX_HASH_SIZE;
1042                hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1043                spin_lock(&unix_table_lock);
1044                u->path = path;
1045                list = &unix_socket_table[hash];
1046        } else {
1047                spin_lock(&unix_table_lock);
1048                err = -EADDRINUSE;
1049                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1050                                              sk->sk_type, hash)) {
1051                        unix_release_addr(addr);
1052                        goto out_unlock;
1053                }
1054
1055                list = &unix_socket_table[addr->hash];
1056        }
1057
1058        err = 0;
1059        __unix_remove_socket(sk);
1060        smp_store_release(&u->addr, addr);
1061        __unix_insert_socket(list, sk);
1062
1063out_unlock:
1064        spin_unlock(&unix_table_lock);
1065out_up:
1066        mutex_unlock(&u->bindlock);
1067out_put:
1068        if (err)
1069                path_put(&path);
1070out:
1071        return err;
1072}
1073
1074static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1075{
1076        if (unlikely(sk1 == sk2) || !sk2) {
1077                unix_state_lock(sk1);
1078                return;
1079        }
1080        if (sk1 < sk2) {
1081                unix_state_lock(sk1);
1082                unix_state_lock_nested(sk2);
1083        } else {
1084                unix_state_lock(sk2);
1085                unix_state_lock_nested(sk1);
1086        }
1087}
1088
1089static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1090{
1091        if (unlikely(sk1 == sk2) || !sk2) {
1092                unix_state_unlock(sk1);
1093                return;
1094        }
1095        unix_state_unlock(sk1);
1096        unix_state_unlock(sk2);
1097}
1098
1099static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1100                              int alen, int flags)
1101{
1102        struct sock *sk = sock->sk;
1103        struct net *net = sock_net(sk);
1104        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1105        struct sock *other;
1106        unsigned int hash;
1107        int err;
1108
1109        err = -EINVAL;
1110        if (alen < offsetofend(struct sockaddr, sa_family))
1111                goto out;
1112
1113        if (addr->sa_family != AF_UNSPEC) {
1114                err = unix_mkname(sunaddr, alen, &hash);
1115                if (err < 0)
1116                        goto out;
1117                alen = err;
1118
1119                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1120                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1121                        goto out;
1122
1123restart:
1124                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1125                if (!other)
1126                        goto out;
1127
1128                unix_state_double_lock(sk, other);
1129
1130                /* Apparently VFS overslept socket death. Retry. */
1131                if (sock_flag(other, SOCK_DEAD)) {
1132                        unix_state_double_unlock(sk, other);
1133                        sock_put(other);
1134                        goto restart;
1135                }
1136
1137                err = -EPERM;
1138                if (!unix_may_send(sk, other))
1139                        goto out_unlock;
1140
1141                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1142                if (err)
1143                        goto out_unlock;
1144
1145        } else {
1146                /*
1147                 *      1003.1g breaking connected state with AF_UNSPEC
1148                 */
1149                other = NULL;
1150                unix_state_double_lock(sk, other);
1151        }
1152
1153        /*
1154         * If it was connected, reconnect.
1155         */
1156        if (unix_peer(sk)) {
1157                struct sock *old_peer = unix_peer(sk);
1158                unix_peer(sk) = other;
1159                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1160
1161                unix_state_double_unlock(sk, other);
1162
1163                if (other != old_peer)
1164                        unix_dgram_disconnected(sk, old_peer);
1165                sock_put(old_peer);
1166        } else {
1167                unix_peer(sk) = other;
1168                unix_state_double_unlock(sk, other);
1169        }
1170        return 0;
1171
1172out_unlock:
1173        unix_state_double_unlock(sk, other);
1174        sock_put(other);
1175out:
1176        return err;
1177}
1178
1179static long unix_wait_for_peer(struct sock *other, long timeo)
1180{
1181        struct unix_sock *u = unix_sk(other);
1182        int sched;
1183        DEFINE_WAIT(wait);
1184
1185        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1186
1187        sched = !sock_flag(other, SOCK_DEAD) &&
1188                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1189                unix_recvq_full(other);
1190
1191        unix_state_unlock(other);
1192
1193        if (sched)
1194                timeo = schedule_timeout(timeo);
1195
1196        finish_wait(&u->peer_wait, &wait);
1197        return timeo;
1198}
1199
1200static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1201                               int addr_len, int flags)
1202{
1203        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1204        struct sock *sk = sock->sk;
1205        struct net *net = sock_net(sk);
1206        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1207        struct sock *newsk = NULL;
1208        struct sock *other = NULL;
1209        struct sk_buff *skb = NULL;
1210        unsigned int hash;
1211        int st;
1212        int err;
1213        long timeo;
1214
1215        err = unix_mkname(sunaddr, addr_len, &hash);
1216        if (err < 0)
1217                goto out;
1218        addr_len = err;
1219
1220        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1221            (err = unix_autobind(sock)) != 0)
1222                goto out;
1223
1224        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1225
1226        /* First of all allocate resources.
1227           If we will make it after state is locked,
1228           we will have to recheck all again in any case.
1229         */
1230
1231        err = -ENOMEM;
1232
1233        /* create new sock for complete connection */
1234        newsk = unix_create1(sock_net(sk), NULL, 0);
1235        if (newsk == NULL)
1236                goto out;
1237
1238        /* Allocate skb for sending to listening sock */
1239        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1240        if (skb == NULL)
1241                goto out;
1242
1243restart:
1244        /*  Find listening sock. */
1245        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1246        if (!other)
1247                goto out;
1248
1249        /* Latch state of peer */
1250        unix_state_lock(other);
1251
1252        /* Apparently VFS overslept socket death. Retry. */
1253        if (sock_flag(other, SOCK_DEAD)) {
1254                unix_state_unlock(other);
1255                sock_put(other);
1256                goto restart;
1257        }
1258
1259        err = -ECONNREFUSED;
1260        if (other->sk_state != TCP_LISTEN)
1261                goto out_unlock;
1262        if (other->sk_shutdown & RCV_SHUTDOWN)
1263                goto out_unlock;
1264
1265        if (unix_recvq_full(other)) {
1266                err = -EAGAIN;
1267                if (!timeo)
1268                        goto out_unlock;
1269
1270                timeo = unix_wait_for_peer(other, timeo);
1271
1272                err = sock_intr_errno(timeo);
1273                if (signal_pending(current))
1274                        goto out;
1275                sock_put(other);
1276                goto restart;
1277        }
1278
1279        /* Latch our state.
1280
1281           It is tricky place. We need to grab our state lock and cannot
1282           drop lock on peer. It is dangerous because deadlock is
1283           possible. Connect to self case and simultaneous
1284           attempt to connect are eliminated by checking socket
1285           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1286           check this before attempt to grab lock.
1287
1288           Well, and we have to recheck the state after socket locked.
1289         */
1290        st = sk->sk_state;
1291
1292        switch (st) {
1293        case TCP_CLOSE:
1294                /* This is ok... continue with connect */
1295                break;
1296        case TCP_ESTABLISHED:
1297                /* Socket is already connected */
1298                err = -EISCONN;
1299                goto out_unlock;
1300        default:
1301                err = -EINVAL;
1302                goto out_unlock;
1303        }
1304
1305        unix_state_lock_nested(sk);
1306
1307        if (sk->sk_state != st) {
1308                unix_state_unlock(sk);
1309                unix_state_unlock(other);
1310                sock_put(other);
1311                goto restart;
1312        }
1313
1314        err = security_unix_stream_connect(sk, other, newsk);
1315        if (err) {
1316                unix_state_unlock(sk);
1317                goto out_unlock;
1318        }
1319
1320        /* The way is open! Fastly set all the necessary fields... */
1321
1322        sock_hold(sk);
1323        unix_peer(newsk)        = sk;
1324        newsk->sk_state         = TCP_ESTABLISHED;
1325        newsk->sk_type          = sk->sk_type;
1326        init_peercred(newsk);
1327        newu = unix_sk(newsk);
1328        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1329        otheru = unix_sk(other);
1330
1331        /* copy address information from listening to new sock
1332         *
1333         * The contents of *(otheru->addr) and otheru->path
1334         * are seen fully set up here, since we have found
1335         * otheru in hash under unix_table_lock.  Insertion
1336         * into the hash chain we'd found it in had been done
1337         * in an earlier critical area protected by unix_table_lock,
1338         * the same one where we'd set *(otheru->addr) contents,
1339         * as well as otheru->path and otheru->addr itself.
1340         *
1341         * Using smp_store_release() here to set newu->addr
1342         * is enough to make those stores, as well as stores
1343         * to newu->path visible to anyone who gets newu->addr
1344         * by smp_load_acquire().  IOW, the same warranties
1345         * as for unix_sock instances bound in unix_bind() or
1346         * in unix_autobind().
1347         */
1348        if (otheru->path.dentry) {
1349                path_get(&otheru->path);
1350                newu->path = otheru->path;
1351        }
1352        refcount_inc(&otheru->addr->refcnt);
1353        smp_store_release(&newu->addr, otheru->addr);
1354
1355        /* Set credentials */
1356        copy_peercred(sk, other);
1357
1358        sock->state     = SS_CONNECTED;
1359        sk->sk_state    = TCP_ESTABLISHED;
1360        sock_hold(newsk);
1361
1362        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1363        unix_peer(sk)   = newsk;
1364
1365        unix_state_unlock(sk);
1366
1367        /* take ten and and send info to listening sock */
1368        spin_lock(&other->sk_receive_queue.lock);
1369        __skb_queue_tail(&other->sk_receive_queue, skb);
1370        spin_unlock(&other->sk_receive_queue.lock);
1371        unix_state_unlock(other);
1372        other->sk_data_ready(other);
1373        sock_put(other);
1374        return 0;
1375
1376out_unlock:
1377        if (other)
1378                unix_state_unlock(other);
1379
1380out:
1381        kfree_skb(skb);
1382        if (newsk)
1383                unix_release_sock(newsk, 0);
1384        if (other)
1385                sock_put(other);
1386        return err;
1387}
1388
1389static int unix_socketpair(struct socket *socka, struct socket *sockb)
1390{
1391        struct sock *ska = socka->sk, *skb = sockb->sk;
1392
1393        /* Join our sockets back to back */
1394        sock_hold(ska);
1395        sock_hold(skb);
1396        unix_peer(ska) = skb;
1397        unix_peer(skb) = ska;
1398        init_peercred(ska);
1399        init_peercred(skb);
1400
1401        if (ska->sk_type != SOCK_DGRAM) {
1402                ska->sk_state = TCP_ESTABLISHED;
1403                skb->sk_state = TCP_ESTABLISHED;
1404                socka->state  = SS_CONNECTED;
1405                sockb->state  = SS_CONNECTED;
1406        }
1407        return 0;
1408}
1409
1410static void unix_sock_inherit_flags(const struct socket *old,
1411                                    struct socket *new)
1412{
1413        if (test_bit(SOCK_PASSCRED, &old->flags))
1414                set_bit(SOCK_PASSCRED, &new->flags);
1415        if (test_bit(SOCK_PASSSEC, &old->flags))
1416                set_bit(SOCK_PASSSEC, &new->flags);
1417}
1418
1419static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1420                       bool kern)
1421{
1422        struct sock *sk = sock->sk;
1423        struct sock *tsk;
1424        struct sk_buff *skb;
1425        int err;
1426
1427        err = -EOPNOTSUPP;
1428        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1429                goto out;
1430
1431        err = -EINVAL;
1432        if (sk->sk_state != TCP_LISTEN)
1433                goto out;
1434
1435        /* If socket state is TCP_LISTEN it cannot change (for now...),
1436         * so that no locks are necessary.
1437         */
1438
1439        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1440        if (!skb) {
1441                /* This means receive shutdown. */
1442                if (err == 0)
1443                        err = -EINVAL;
1444                goto out;
1445        }
1446
1447        tsk = skb->sk;
1448        skb_free_datagram(sk, skb);
1449        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1450
1451        /* attach accepted sock to socket */
1452        unix_state_lock(tsk);
1453        newsock->state = SS_CONNECTED;
1454        unix_sock_inherit_flags(sock, newsock);
1455        sock_graft(tsk, newsock);
1456        unix_state_unlock(tsk);
1457        return 0;
1458
1459out:
1460        return err;
1461}
1462
1463
1464static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1465{
1466        struct sock *sk = sock->sk;
1467        struct unix_address *addr;
1468        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1469        int err = 0;
1470
1471        if (peer) {
1472                sk = unix_peer_get(sk);
1473
1474                err = -ENOTCONN;
1475                if (!sk)
1476                        goto out;
1477                err = 0;
1478        } else {
1479                sock_hold(sk);
1480        }
1481
1482        addr = smp_load_acquire(&unix_sk(sk)->addr);
1483        if (!addr) {
1484                sunaddr->sun_family = AF_UNIX;
1485                sunaddr->sun_path[0] = 0;
1486                err = sizeof(short);
1487        } else {
1488                err = addr->len;
1489                memcpy(sunaddr, addr->name, addr->len);
1490        }
1491        sock_put(sk);
1492out:
1493        return err;
1494}
1495
1496static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1497{
1498        int err = 0;
1499
1500        UNIXCB(skb).pid  = get_pid(scm->pid);
1501        UNIXCB(skb).uid = scm->creds.uid;
1502        UNIXCB(skb).gid = scm->creds.gid;
1503        UNIXCB(skb).fp = NULL;
1504        unix_get_secdata(scm, skb);
1505        if (scm->fp && send_fds)
1506                err = unix_attach_fds(scm, skb);
1507
1508        skb->destructor = unix_destruct_scm;
1509        return err;
1510}
1511
1512static bool unix_passcred_enabled(const struct socket *sock,
1513                                  const struct sock *other)
1514{
1515        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1516               !other->sk_socket ||
1517               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1518}
1519
1520/*
1521 * Some apps rely on write() giving SCM_CREDENTIALS
1522 * We include credentials if source or destination socket
1523 * asserted SOCK_PASSCRED.
1524 */
1525static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1526                            const struct sock *other)
1527{
1528        if (UNIXCB(skb).pid)
1529                return;
1530        if (unix_passcred_enabled(sock, other)) {
1531                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1532                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1533        }
1534}
1535
1536static int maybe_init_creds(struct scm_cookie *scm,
1537                            struct socket *socket,
1538                            const struct sock *other)
1539{
1540        int err;
1541        struct msghdr msg = { .msg_controllen = 0 };
1542
1543        err = scm_send(socket, &msg, scm, false);
1544        if (err)
1545                return err;
1546
1547        if (unix_passcred_enabled(socket, other)) {
1548                scm->pid = get_pid(task_tgid(current));
1549                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1550        }
1551        return err;
1552}
1553
1554static bool unix_skb_scm_eq(struct sk_buff *skb,
1555                            struct scm_cookie *scm)
1556{
1557        const struct unix_skb_parms *u = &UNIXCB(skb);
1558
1559        return u->pid == scm->pid &&
1560               uid_eq(u->uid, scm->creds.uid) &&
1561               gid_eq(u->gid, scm->creds.gid) &&
1562               unix_secdata_eq(scm, skb);
1563}
1564
1565/*
1566 *      Send AF_UNIX data.
1567 */
1568
1569static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1570                              size_t len)
1571{
1572        struct sock *sk = sock->sk;
1573        struct net *net = sock_net(sk);
1574        struct unix_sock *u = unix_sk(sk);
1575        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1576        struct sock *other = NULL;
1577        int namelen = 0; /* fake GCC */
1578        int err;
1579        unsigned int hash;
1580        struct sk_buff *skb;
1581        long timeo;
1582        struct scm_cookie scm;
1583        int data_len = 0;
1584        int sk_locked;
1585
1586        wait_for_unix_gc();
1587        err = scm_send(sock, msg, &scm, false);
1588        if (err < 0)
1589                return err;
1590
1591        err = -EOPNOTSUPP;
1592        if (msg->msg_flags&MSG_OOB)
1593                goto out;
1594
1595        if (msg->msg_namelen) {
1596                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1597                if (err < 0)
1598                        goto out;
1599                namelen = err;
1600        } else {
1601                sunaddr = NULL;
1602                err = -ENOTCONN;
1603                other = unix_peer_get(sk);
1604                if (!other)
1605                        goto out;
1606        }
1607
1608        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1609            && (err = unix_autobind(sock)) != 0)
1610                goto out;
1611
1612        err = -EMSGSIZE;
1613        if (len > sk->sk_sndbuf - 32)
1614                goto out;
1615
1616        if (len > SKB_MAX_ALLOC) {
1617                data_len = min_t(size_t,
1618                                 len - SKB_MAX_ALLOC,
1619                                 MAX_SKB_FRAGS * PAGE_SIZE);
1620                data_len = PAGE_ALIGN(data_len);
1621
1622                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1623        }
1624
1625        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1626                                   msg->msg_flags & MSG_DONTWAIT, &err,
1627                                   PAGE_ALLOC_COSTLY_ORDER);
1628        if (skb == NULL)
1629                goto out;
1630
1631        err = unix_scm_to_skb(&scm, skb, true);
1632        if (err < 0)
1633                goto out_free;
1634
1635        skb_put(skb, len - data_len);
1636        skb->data_len = data_len;
1637        skb->len = len;
1638        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1639        if (err)
1640                goto out_free;
1641
1642        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1643
1644restart:
1645        if (!other) {
1646                err = -ECONNRESET;
1647                if (sunaddr == NULL)
1648                        goto out_free;
1649
1650                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1651                                        hash, &err);
1652                if (other == NULL)
1653                        goto out_free;
1654        }
1655
1656        if (sk_filter(other, skb) < 0) {
1657                /* Toss the packet but do not return any error to the sender */
1658                err = len;
1659                goto out_free;
1660        }
1661
1662        sk_locked = 0;
1663        unix_state_lock(other);
1664restart_locked:
1665        err = -EPERM;
1666        if (!unix_may_send(sk, other))
1667                goto out_unlock;
1668
1669        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1670                /*
1671                 *      Check with 1003.1g - what should
1672                 *      datagram error
1673                 */
1674                unix_state_unlock(other);
1675                sock_put(other);
1676
1677                if (!sk_locked)
1678                        unix_state_lock(sk);
1679
1680                err = 0;
1681                if (unix_peer(sk) == other) {
1682                        unix_peer(sk) = NULL;
1683                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1684
1685                        unix_state_unlock(sk);
1686
1687                        unix_dgram_disconnected(sk, other);
1688                        sock_put(other);
1689                        err = -ECONNREFUSED;
1690                } else {
1691                        unix_state_unlock(sk);
1692                }
1693
1694                other = NULL;
1695                if (err)
1696                        goto out_free;
1697                goto restart;
1698        }
1699
1700        err = -EPIPE;
1701        if (other->sk_shutdown & RCV_SHUTDOWN)
1702                goto out_unlock;
1703
1704        if (sk->sk_type != SOCK_SEQPACKET) {
1705                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1706                if (err)
1707                        goto out_unlock;
1708        }
1709
1710        /* other == sk && unix_peer(other) != sk if
1711         * - unix_peer(sk) == NULL, destination address bound to sk
1712         * - unix_peer(sk) == sk by time of get but disconnected before lock
1713         */
1714        if (other != sk &&
1715            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1716                if (timeo) {
1717                        timeo = unix_wait_for_peer(other, timeo);
1718
1719                        err = sock_intr_errno(timeo);
1720                        if (signal_pending(current))
1721                                goto out_free;
1722
1723                        goto restart;
1724                }
1725
1726                if (!sk_locked) {
1727                        unix_state_unlock(other);
1728                        unix_state_double_lock(sk, other);
1729                }
1730
1731                if (unix_peer(sk) != other ||
1732                    unix_dgram_peer_wake_me(sk, other)) {
1733                        err = -EAGAIN;
1734                        sk_locked = 1;
1735                        goto out_unlock;
1736                }
1737
1738                if (!sk_locked) {
1739                        sk_locked = 1;
1740                        goto restart_locked;
1741                }
1742        }
1743
1744        if (unlikely(sk_locked))
1745                unix_state_unlock(sk);
1746
1747        if (sock_flag(other, SOCK_RCVTSTAMP))
1748                __net_timestamp(skb);
1749        maybe_add_creds(skb, sock, other);
1750        skb_queue_tail(&other->sk_receive_queue, skb);
1751        unix_state_unlock(other);
1752        other->sk_data_ready(other);
1753        sock_put(other);
1754        scm_destroy(&scm);
1755        return len;
1756
1757out_unlock:
1758        if (sk_locked)
1759                unix_state_unlock(sk);
1760        unix_state_unlock(other);
1761out_free:
1762        kfree_skb(skb);
1763out:
1764        if (other)
1765                sock_put(other);
1766        scm_destroy(&scm);
1767        return err;
1768}
1769
1770/* We use paged skbs for stream sockets, and limit occupancy to 32768
1771 * bytes, and a minimum of a full page.
1772 */
1773#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1774
1775static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1776                               size_t len)
1777{
1778        struct sock *sk = sock->sk;
1779        struct sock *other = NULL;
1780        int err, size;
1781        struct sk_buff *skb;
1782        int sent = 0;
1783        struct scm_cookie scm;
1784        bool fds_sent = false;
1785        int data_len;
1786
1787        wait_for_unix_gc();
1788        err = scm_send(sock, msg, &scm, false);
1789        if (err < 0)
1790                return err;
1791
1792        err = -EOPNOTSUPP;
1793        if (msg->msg_flags&MSG_OOB)
1794                goto out_err;
1795
1796        if (msg->msg_namelen) {
1797                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1798                goto out_err;
1799        } else {
1800                err = -ENOTCONN;
1801                other = unix_peer(sk);
1802                if (!other)
1803                        goto out_err;
1804        }
1805
1806        if (sk->sk_shutdown & SEND_SHUTDOWN)
1807                goto pipe_err;
1808
1809        while (sent < len) {
1810                size = len - sent;
1811
1812                /* Keep two messages in the pipe so it schedules better */
1813                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1814
1815                /* allow fallback to order-0 allocations */
1816                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1817
1818                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1819
1820                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1821
1822                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1823                                           msg->msg_flags & MSG_DONTWAIT, &err,
1824                                           get_order(UNIX_SKB_FRAGS_SZ));
1825                if (!skb)
1826                        goto out_err;
1827
1828                /* Only send the fds in the first buffer */
1829                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1830                if (err < 0) {
1831                        kfree_skb(skb);
1832                        goto out_err;
1833                }
1834                fds_sent = true;
1835
1836                skb_put(skb, size - data_len);
1837                skb->data_len = data_len;
1838                skb->len = size;
1839                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1840                if (err) {
1841                        kfree_skb(skb);
1842                        goto out_err;
1843                }
1844
1845                unix_state_lock(other);
1846
1847                if (sock_flag(other, SOCK_DEAD) ||
1848                    (other->sk_shutdown & RCV_SHUTDOWN))
1849                        goto pipe_err_free;
1850
1851                maybe_add_creds(skb, sock, other);
1852                skb_queue_tail(&other->sk_receive_queue, skb);
1853                unix_state_unlock(other);
1854                other->sk_data_ready(other);
1855                sent += size;
1856        }
1857
1858        scm_destroy(&scm);
1859
1860        return sent;
1861
1862pipe_err_free:
1863        unix_state_unlock(other);
1864        kfree_skb(skb);
1865pipe_err:
1866        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1867                send_sig(SIGPIPE, current, 0);
1868        err = -EPIPE;
1869out_err:
1870        scm_destroy(&scm);
1871        return sent ? : err;
1872}
1873
1874static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1875                                    int offset, size_t size, int flags)
1876{
1877        int err;
1878        bool send_sigpipe = false;
1879        bool init_scm = true;
1880        struct scm_cookie scm;
1881        struct sock *other, *sk = socket->sk;
1882        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1883
1884        if (flags & MSG_OOB)
1885                return -EOPNOTSUPP;
1886
1887        other = unix_peer(sk);
1888        if (!other || sk->sk_state != TCP_ESTABLISHED)
1889                return -ENOTCONN;
1890
1891        if (false) {
1892alloc_skb:
1893                unix_state_unlock(other);
1894                mutex_unlock(&unix_sk(other)->iolock);
1895                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1896                                              &err, 0);
1897                if (!newskb)
1898                        goto err;
1899        }
1900
1901        /* we must acquire iolock as we modify already present
1902         * skbs in the sk_receive_queue and mess with skb->len
1903         */
1904        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1905        if (err) {
1906                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1907                goto err;
1908        }
1909
1910        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1911                err = -EPIPE;
1912                send_sigpipe = true;
1913                goto err_unlock;
1914        }
1915
1916        unix_state_lock(other);
1917
1918        if (sock_flag(other, SOCK_DEAD) ||
1919            other->sk_shutdown & RCV_SHUTDOWN) {
1920                err = -EPIPE;
1921                send_sigpipe = true;
1922                goto err_state_unlock;
1923        }
1924
1925        if (init_scm) {
1926                err = maybe_init_creds(&scm, socket, other);
1927                if (err)
1928                        goto err_state_unlock;
1929                init_scm = false;
1930        }
1931
1932        skb = skb_peek_tail(&other->sk_receive_queue);
1933        if (tail && tail == skb) {
1934                skb = newskb;
1935        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1936                if (newskb) {
1937                        skb = newskb;
1938                } else {
1939                        tail = skb;
1940                        goto alloc_skb;
1941                }
1942        } else if (newskb) {
1943                /* this is fast path, we don't necessarily need to
1944                 * call to kfree_skb even though with newskb == NULL
1945                 * this - does no harm
1946                 */
1947                consume_skb(newskb);
1948                newskb = NULL;
1949        }
1950
1951        if (skb_append_pagefrags(skb, page, offset, size)) {
1952                tail = skb;
1953                goto alloc_skb;
1954        }
1955
1956        skb->len += size;
1957        skb->data_len += size;
1958        skb->truesize += size;
1959        refcount_add(size, &sk->sk_wmem_alloc);
1960
1961        if (newskb) {
1962                err = unix_scm_to_skb(&scm, skb, false);
1963                if (err)
1964                        goto err_state_unlock;
1965                spin_lock(&other->sk_receive_queue.lock);
1966                __skb_queue_tail(&other->sk_receive_queue, newskb);
1967                spin_unlock(&other->sk_receive_queue.lock);
1968        }
1969
1970        unix_state_unlock(other);
1971        mutex_unlock(&unix_sk(other)->iolock);
1972
1973        other->sk_data_ready(other);
1974        scm_destroy(&scm);
1975        return size;
1976
1977err_state_unlock:
1978        unix_state_unlock(other);
1979err_unlock:
1980        mutex_unlock(&unix_sk(other)->iolock);
1981err:
1982        kfree_skb(newskb);
1983        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1984                send_sig(SIGPIPE, current, 0);
1985        if (!init_scm)
1986                scm_destroy(&scm);
1987        return err;
1988}
1989
1990static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1991                                  size_t len)
1992{
1993        int err;
1994        struct sock *sk = sock->sk;
1995
1996        err = sock_error(sk);
1997        if (err)
1998                return err;
1999
2000        if (sk->sk_state != TCP_ESTABLISHED)
2001                return -ENOTCONN;
2002
2003        if (msg->msg_namelen)
2004                msg->msg_namelen = 0;
2005
2006        return unix_dgram_sendmsg(sock, msg, len);
2007}
2008
2009static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2010                                  size_t size, int flags)
2011{
2012        struct sock *sk = sock->sk;
2013
2014        if (sk->sk_state != TCP_ESTABLISHED)
2015                return -ENOTCONN;
2016
2017        return unix_dgram_recvmsg(sock, msg, size, flags);
2018}
2019
2020static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2021{
2022        struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2023
2024        if (addr) {
2025                msg->msg_namelen = addr->len;
2026                memcpy(msg->msg_name, addr->name, addr->len);
2027        }
2028}
2029
2030static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2031                              size_t size, int flags)
2032{
2033        struct scm_cookie scm;
2034        struct sock *sk = sock->sk;
2035        struct unix_sock *u = unix_sk(sk);
2036        struct sk_buff *skb, *last;
2037        long timeo;
2038        int skip;
2039        int err;
2040
2041        err = -EOPNOTSUPP;
2042        if (flags&MSG_OOB)
2043                goto out;
2044
2045        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2046
2047        do {
2048                mutex_lock(&u->iolock);
2049
2050                skip = sk_peek_offset(sk, flags);
2051                skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2052                                              &last);
2053                if (skb)
2054                        break;
2055
2056                mutex_unlock(&u->iolock);
2057
2058                if (err != -EAGAIN)
2059                        break;
2060        } while (timeo &&
2061                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2062
2063        if (!skb) { /* implies iolock unlocked */
2064                unix_state_lock(sk);
2065                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2066                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2067                    (sk->sk_shutdown & RCV_SHUTDOWN))
2068                        err = 0;
2069                unix_state_unlock(sk);
2070                goto out;
2071        }
2072
2073        if (wq_has_sleeper(&u->peer_wait))
2074                wake_up_interruptible_sync_poll(&u->peer_wait,
2075                                                EPOLLOUT | EPOLLWRNORM |
2076                                                EPOLLWRBAND);
2077
2078        if (msg->msg_name)
2079                unix_copy_addr(msg, skb->sk);
2080
2081        if (size > skb->len - skip)
2082                size = skb->len - skip;
2083        else if (size < skb->len - skip)
2084                msg->msg_flags |= MSG_TRUNC;
2085
2086        err = skb_copy_datagram_msg(skb, skip, msg, size);
2087        if (err)
2088                goto out_free;
2089
2090        if (sock_flag(sk, SOCK_RCVTSTAMP))
2091                __sock_recv_timestamp(msg, sk, skb);
2092
2093        memset(&scm, 0, sizeof(scm));
2094
2095        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2096        unix_set_secdata(&scm, skb);
2097
2098        if (!(flags & MSG_PEEK)) {
2099                if (UNIXCB(skb).fp)
2100                        unix_detach_fds(&scm, skb);
2101
2102                sk_peek_offset_bwd(sk, skb->len);
2103        } else {
2104                /* It is questionable: on PEEK we could:
2105                   - do not return fds - good, but too simple 8)
2106                   - return fds, and do not return them on read (old strategy,
2107                     apparently wrong)
2108                   - clone fds (I chose it for now, it is the most universal
2109                     solution)
2110
2111                   POSIX 1003.1g does not actually define this clearly
2112                   at all. POSIX 1003.1g doesn't define a lot of things
2113                   clearly however!
2114
2115                */
2116
2117                sk_peek_offset_fwd(sk, size);
2118
2119                if (UNIXCB(skb).fp)
2120                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2121        }
2122        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2123
2124        scm_recv(sock, msg, &scm, flags);
2125
2126out_free:
2127        skb_free_datagram(sk, skb);
2128        mutex_unlock(&u->iolock);
2129out:
2130        return err;
2131}
2132
2133/*
2134 *      Sleep until more data has arrived. But check for races..
2135 */
2136static long unix_stream_data_wait(struct sock *sk, long timeo,
2137                                  struct sk_buff *last, unsigned int last_len,
2138                                  bool freezable)
2139{
2140        struct sk_buff *tail;
2141        DEFINE_WAIT(wait);
2142
2143        unix_state_lock(sk);
2144
2145        for (;;) {
2146                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2147
2148                tail = skb_peek_tail(&sk->sk_receive_queue);
2149                if (tail != last ||
2150                    (tail && tail->len != last_len) ||
2151                    sk->sk_err ||
2152                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2153                    signal_pending(current) ||
2154                    !timeo)
2155                        break;
2156
2157                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2158                unix_state_unlock(sk);
2159                if (freezable)
2160                        timeo = freezable_schedule_timeout(timeo);
2161                else
2162                        timeo = schedule_timeout(timeo);
2163                unix_state_lock(sk);
2164
2165                if (sock_flag(sk, SOCK_DEAD))
2166                        break;
2167
2168                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2169        }
2170
2171        finish_wait(sk_sleep(sk), &wait);
2172        unix_state_unlock(sk);
2173        return timeo;
2174}
2175
2176static unsigned int unix_skb_len(const struct sk_buff *skb)
2177{
2178        return skb->len - UNIXCB(skb).consumed;
2179}
2180
2181struct unix_stream_read_state {
2182        int (*recv_actor)(struct sk_buff *, int, int,
2183                          struct unix_stream_read_state *);
2184        struct socket *socket;
2185        struct msghdr *msg;
2186        struct pipe_inode_info *pipe;
2187        size_t size;
2188        int flags;
2189        unsigned int splice_flags;
2190};
2191
2192static int unix_stream_read_generic(struct unix_stream_read_state *state,
2193                                    bool freezable)
2194{
2195        struct scm_cookie scm;
2196        struct socket *sock = state->socket;
2197        struct sock *sk = sock->sk;
2198        struct unix_sock *u = unix_sk(sk);
2199        int copied = 0;
2200        int flags = state->flags;
2201        int noblock = flags & MSG_DONTWAIT;
2202        bool check_creds = false;
2203        int target;
2204        int err = 0;
2205        long timeo;
2206        int skip;
2207        size_t size = state->size;
2208        unsigned int last_len;
2209
2210        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2211                err = -EINVAL;
2212                goto out;
2213        }
2214
2215        if (unlikely(flags & MSG_OOB)) {
2216                err = -EOPNOTSUPP;
2217                goto out;
2218        }
2219
2220        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2221        timeo = sock_rcvtimeo(sk, noblock);
2222
2223        memset(&scm, 0, sizeof(scm));
2224
2225        /* Lock the socket to prevent queue disordering
2226         * while sleeps in memcpy_tomsg
2227         */
2228        mutex_lock(&u->iolock);
2229
2230        skip = max(sk_peek_offset(sk, flags), 0);
2231
2232        do {
2233                int chunk;
2234                bool drop_skb;
2235                struct sk_buff *skb, *last;
2236
2237redo:
2238                unix_state_lock(sk);
2239                if (sock_flag(sk, SOCK_DEAD)) {
2240                        err = -ECONNRESET;
2241                        goto unlock;
2242                }
2243                last = skb = skb_peek(&sk->sk_receive_queue);
2244                last_len = last ? last->len : 0;
2245again:
2246                if (skb == NULL) {
2247                        if (copied >= target)
2248                                goto unlock;
2249
2250                        /*
2251                         *      POSIX 1003.1g mandates this order.
2252                         */
2253
2254                        err = sock_error(sk);
2255                        if (err)
2256                                goto unlock;
2257                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2258                                goto unlock;
2259
2260                        unix_state_unlock(sk);
2261                        if (!timeo) {
2262                                err = -EAGAIN;
2263                                break;
2264                        }
2265
2266                        mutex_unlock(&u->iolock);
2267
2268                        timeo = unix_stream_data_wait(sk, timeo, last,
2269                                                      last_len, freezable);
2270
2271                        if (signal_pending(current)) {
2272                                err = sock_intr_errno(timeo);
2273                                scm_destroy(&scm);
2274                                goto out;
2275                        }
2276
2277                        mutex_lock(&u->iolock);
2278                        goto redo;
2279unlock:
2280                        unix_state_unlock(sk);
2281                        break;
2282                }
2283
2284                while (skip >= unix_skb_len(skb)) {
2285                        skip -= unix_skb_len(skb);
2286                        last = skb;
2287                        last_len = skb->len;
2288                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2289                        if (!skb)
2290                                goto again;
2291                }
2292
2293                unix_state_unlock(sk);
2294
2295                if (check_creds) {
2296                        /* Never glue messages from different writers */
2297                        if (!unix_skb_scm_eq(skb, &scm))
2298                                break;
2299                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2300                        /* Copy credentials */
2301                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2302                        unix_set_secdata(&scm, skb);
2303                        check_creds = true;
2304                }
2305
2306                /* Copy address just once */
2307                if (state->msg && state->msg->msg_name) {
2308                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2309                                         state->msg->msg_name);
2310                        unix_copy_addr(state->msg, skb->sk);
2311                        sunaddr = NULL;
2312                }
2313
2314                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2315                skb_get(skb);
2316                chunk = state->recv_actor(skb, skip, chunk, state);
2317                drop_skb = !unix_skb_len(skb);
2318                /* skb is only safe to use if !drop_skb */
2319                consume_skb(skb);
2320                if (chunk < 0) {
2321                        if (copied == 0)
2322                                copied = -EFAULT;
2323                        break;
2324                }
2325                copied += chunk;
2326                size -= chunk;
2327
2328                if (drop_skb) {
2329                        /* the skb was touched by a concurrent reader;
2330                         * we should not expect anything from this skb
2331                         * anymore and assume it invalid - we can be
2332                         * sure it was dropped from the socket queue
2333                         *
2334                         * let's report a short read
2335                         */
2336                        err = 0;
2337                        break;
2338                }
2339
2340                /* Mark read part of skb as used */
2341                if (!(flags & MSG_PEEK)) {
2342                        UNIXCB(skb).consumed += chunk;
2343
2344                        sk_peek_offset_bwd(sk, chunk);
2345
2346                        if (UNIXCB(skb).fp)
2347                                unix_detach_fds(&scm, skb);
2348
2349                        if (unix_skb_len(skb))
2350                                break;
2351
2352                        skb_unlink(skb, &sk->sk_receive_queue);
2353                        consume_skb(skb);
2354
2355                        if (scm.fp)
2356                                break;
2357                } else {
2358                        /* It is questionable, see note in unix_dgram_recvmsg.
2359                         */
2360                        if (UNIXCB(skb).fp)
2361                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2362
2363                        sk_peek_offset_fwd(sk, chunk);
2364
2365                        if (UNIXCB(skb).fp)
2366                                break;
2367
2368                        skip = 0;
2369                        last = skb;
2370                        last_len = skb->len;
2371                        unix_state_lock(sk);
2372                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2373                        if (skb)
2374                                goto again;
2375                        unix_state_unlock(sk);
2376                        break;
2377                }
2378        } while (size);
2379
2380        mutex_unlock(&u->iolock);
2381        if (state->msg)
2382                scm_recv(sock, state->msg, &scm, flags);
2383        else
2384                scm_destroy(&scm);
2385out:
2386        return copied ? : err;
2387}
2388
2389static int unix_stream_read_actor(struct sk_buff *skb,
2390                                  int skip, int chunk,
2391                                  struct unix_stream_read_state *state)
2392{
2393        int ret;
2394
2395        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2396                                    state->msg, chunk);
2397        return ret ?: chunk;
2398}
2399
2400static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2401                               size_t size, int flags)
2402{
2403        struct unix_stream_read_state state = {
2404                .recv_actor = unix_stream_read_actor,
2405                .socket = sock,
2406                .msg = msg,
2407                .size = size,
2408                .flags = flags
2409        };
2410
2411        return unix_stream_read_generic(&state, true);
2412}
2413
2414static int unix_stream_splice_actor(struct sk_buff *skb,
2415                                    int skip, int chunk,
2416                                    struct unix_stream_read_state *state)
2417{
2418        return skb_splice_bits(skb, state->socket->sk,
2419                               UNIXCB(skb).consumed + skip,
2420                               state->pipe, chunk, state->splice_flags);
2421}
2422
2423static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2424                                       struct pipe_inode_info *pipe,
2425                                       size_t size, unsigned int flags)
2426{
2427        struct unix_stream_read_state state = {
2428                .recv_actor = unix_stream_splice_actor,
2429                .socket = sock,
2430                .pipe = pipe,
2431                .size = size,
2432                .splice_flags = flags,
2433        };
2434
2435        if (unlikely(*ppos))
2436                return -ESPIPE;
2437
2438        if (sock->file->f_flags & O_NONBLOCK ||
2439            flags & SPLICE_F_NONBLOCK)
2440                state.flags = MSG_DONTWAIT;
2441
2442        return unix_stream_read_generic(&state, false);
2443}
2444
2445static int unix_shutdown(struct socket *sock, int mode)
2446{
2447        struct sock *sk = sock->sk;
2448        struct sock *other;
2449
2450        if (mode < SHUT_RD || mode > SHUT_RDWR)
2451                return -EINVAL;
2452        /* This maps:
2453         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2454         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2455         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2456         */
2457        ++mode;
2458
2459        unix_state_lock(sk);
2460        sk->sk_shutdown |= mode;
2461        other = unix_peer(sk);
2462        if (other)
2463                sock_hold(other);
2464        unix_state_unlock(sk);
2465        sk->sk_state_change(sk);
2466
2467        if (other &&
2468                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2469
2470                int peer_mode = 0;
2471
2472                if (mode&RCV_SHUTDOWN)
2473                        peer_mode |= SEND_SHUTDOWN;
2474                if (mode&SEND_SHUTDOWN)
2475                        peer_mode |= RCV_SHUTDOWN;
2476                unix_state_lock(other);
2477                other->sk_shutdown |= peer_mode;
2478                unix_state_unlock(other);
2479                other->sk_state_change(other);
2480                if (peer_mode == SHUTDOWN_MASK)
2481                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2482                else if (peer_mode & RCV_SHUTDOWN)
2483                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2484        }
2485        if (other)
2486                sock_put(other);
2487
2488        return 0;
2489}
2490
2491long unix_inq_len(struct sock *sk)
2492{
2493        struct sk_buff *skb;
2494        long amount = 0;
2495
2496        if (sk->sk_state == TCP_LISTEN)
2497                return -EINVAL;
2498
2499        spin_lock(&sk->sk_receive_queue.lock);
2500        if (sk->sk_type == SOCK_STREAM ||
2501            sk->sk_type == SOCK_SEQPACKET) {
2502                skb_queue_walk(&sk->sk_receive_queue, skb)
2503                        amount += unix_skb_len(skb);
2504        } else {
2505                skb = skb_peek(&sk->sk_receive_queue);
2506                if (skb)
2507                        amount = skb->len;
2508        }
2509        spin_unlock(&sk->sk_receive_queue.lock);
2510
2511        return amount;
2512}
2513EXPORT_SYMBOL_GPL(unix_inq_len);
2514
2515long unix_outq_len(struct sock *sk)
2516{
2517        return sk_wmem_alloc_get(sk);
2518}
2519EXPORT_SYMBOL_GPL(unix_outq_len);
2520
2521static int unix_open_file(struct sock *sk)
2522{
2523        struct path path;
2524        struct file *f;
2525        int fd;
2526
2527        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2528                return -EPERM;
2529
2530        if (!smp_load_acquire(&unix_sk(sk)->addr))
2531                return -ENOENT;
2532
2533        path = unix_sk(sk)->path;
2534        if (!path.dentry)
2535                return -ENOENT;
2536
2537        path_get(&path);
2538
2539        fd = get_unused_fd_flags(O_CLOEXEC);
2540        if (fd < 0)
2541                goto out;
2542
2543        f = dentry_open(&path, O_PATH, current_cred());
2544        if (IS_ERR(f)) {
2545                put_unused_fd(fd);
2546                fd = PTR_ERR(f);
2547                goto out;
2548        }
2549
2550        fd_install(fd, f);
2551out:
2552        path_put(&path);
2553
2554        return fd;
2555}
2556
2557static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2558{
2559        struct sock *sk = sock->sk;
2560        long amount = 0;
2561        int err;
2562
2563        switch (cmd) {
2564        case SIOCOUTQ:
2565                amount = unix_outq_len(sk);
2566                err = put_user(amount, (int __user *)arg);
2567                break;
2568        case SIOCINQ:
2569                amount = unix_inq_len(sk);
2570                if (amount < 0)
2571                        err = amount;
2572                else
2573                        err = put_user(amount, (int __user *)arg);
2574                break;
2575        case SIOCUNIXFILE:
2576                err = unix_open_file(sk);
2577                break;
2578        default:
2579                err = -ENOIOCTLCMD;
2580                break;
2581        }
2582        return err;
2583}
2584
2585static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2586{
2587        struct sock *sk = sock->sk;
2588        __poll_t mask;
2589
2590        sock_poll_wait(file, sock, wait);
2591        mask = 0;
2592
2593        /* exceptional events? */
2594        if (sk->sk_err)
2595                mask |= EPOLLERR;
2596        if (sk->sk_shutdown == SHUTDOWN_MASK)
2597                mask |= EPOLLHUP;
2598        if (sk->sk_shutdown & RCV_SHUTDOWN)
2599                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2600
2601        /* readable? */
2602        if (!skb_queue_empty(&sk->sk_receive_queue))
2603                mask |= EPOLLIN | EPOLLRDNORM;
2604
2605        /* Connection-based need to check for termination and startup */
2606        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2607            sk->sk_state == TCP_CLOSE)
2608                mask |= EPOLLHUP;
2609
2610        /*
2611         * we set writable also when the other side has shut down the
2612         * connection. This prevents stuck sockets.
2613         */
2614        if (unix_writable(sk))
2615                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2616
2617        return mask;
2618}
2619
2620static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2621                                    poll_table *wait)
2622{
2623        struct sock *sk = sock->sk, *other;
2624        unsigned int writable;
2625        __poll_t mask;
2626
2627        sock_poll_wait(file, sock, wait);
2628        mask = 0;
2629
2630        /* exceptional events? */
2631        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2632                mask |= EPOLLERR |
2633                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2634
2635        if (sk->sk_shutdown & RCV_SHUTDOWN)
2636                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2637        if (sk->sk_shutdown == SHUTDOWN_MASK)
2638                mask |= EPOLLHUP;
2639
2640        /* readable? */
2641        if (!skb_queue_empty(&sk->sk_receive_queue))
2642                mask |= EPOLLIN | EPOLLRDNORM;
2643
2644        /* Connection-based need to check for termination and startup */
2645        if (sk->sk_type == SOCK_SEQPACKET) {
2646                if (sk->sk_state == TCP_CLOSE)
2647                        mask |= EPOLLHUP;
2648                /* connection hasn't started yet? */
2649                if (sk->sk_state == TCP_SYN_SENT)
2650                        return mask;
2651        }
2652
2653        /* No write status requested, avoid expensive OUT tests. */
2654        if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2655                return mask;
2656
2657        writable = unix_writable(sk);
2658        if (writable) {
2659                unix_state_lock(sk);
2660
2661                other = unix_peer(sk);
2662                if (other && unix_peer(other) != sk &&
2663                    unix_recvq_full(other) &&
2664                    unix_dgram_peer_wake_me(sk, other))
2665                        writable = 0;
2666
2667                unix_state_unlock(sk);
2668        }
2669
2670        if (writable)
2671                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2672        else
2673                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2674
2675        return mask;
2676}
2677
2678#ifdef CONFIG_PROC_FS
2679
2680#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2681
2682#define get_bucket(x) ((x) >> BUCKET_SPACE)
2683#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2684#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2685
2686static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2687{
2688        unsigned long offset = get_offset(*pos);
2689        unsigned long bucket = get_bucket(*pos);
2690        struct sock *sk;
2691        unsigned long count = 0;
2692
2693        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2694                if (sock_net(sk) != seq_file_net(seq))
2695                        continue;
2696                if (++count == offset)
2697                        break;
2698        }
2699
2700        return sk;
2701}
2702
2703static struct sock *unix_next_socket(struct seq_file *seq,
2704                                     struct sock *sk,
2705                                     loff_t *pos)
2706{
2707        unsigned long bucket;
2708
2709        while (sk > (struct sock *)SEQ_START_TOKEN) {
2710                sk = sk_next(sk);
2711                if (!sk)
2712                        goto next_bucket;
2713                if (sock_net(sk) == seq_file_net(seq))
2714                        return sk;
2715        }
2716
2717        do {
2718                sk = unix_from_bucket(seq, pos);
2719                if (sk)
2720                        return sk;
2721
2722next_bucket:
2723                bucket = get_bucket(*pos) + 1;
2724                *pos = set_bucket_offset(bucket, 1);
2725        } while (bucket < ARRAY_SIZE(unix_socket_table));
2726
2727        return NULL;
2728}
2729
2730static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2731        __acquires(unix_table_lock)
2732{
2733        spin_lock(&unix_table_lock);
2734
2735        if (!*pos)
2736                return SEQ_START_TOKEN;
2737
2738        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2739                return NULL;
2740
2741        return unix_next_socket(seq, NULL, pos);
2742}
2743
2744static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2745{
2746        ++*pos;
2747        return unix_next_socket(seq, v, pos);
2748}
2749
2750static void unix_seq_stop(struct seq_file *seq, void *v)
2751        __releases(unix_table_lock)
2752{
2753        spin_unlock(&unix_table_lock);
2754}
2755
2756static int unix_seq_show(struct seq_file *seq, void *v)
2757{
2758
2759        if (v == SEQ_START_TOKEN)
2760                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2761                         "Inode Path\n");
2762        else {
2763                struct sock *s = v;
2764                struct unix_sock *u = unix_sk(s);
2765                unix_state_lock(s);
2766
2767                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2768                        s,
2769                        refcount_read(&s->sk_refcnt),
2770                        0,
2771                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2772                        s->sk_type,
2773                        s->sk_socket ?
2774                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2775                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2776                        sock_i_ino(s));
2777
2778                if (u->addr) {  // under unix_table_lock here
2779                        int i, len;
2780                        seq_putc(seq, ' ');
2781
2782                        i = 0;
2783                        len = u->addr->len - sizeof(short);
2784                        if (!UNIX_ABSTRACT(s))
2785                                len--;
2786                        else {
2787                                seq_putc(seq, '@');
2788                                i++;
2789                        }
2790                        for ( ; i < len; i++)
2791                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2792                                         '@');
2793                }
2794                unix_state_unlock(s);
2795                seq_putc(seq, '\n');
2796        }
2797
2798        return 0;
2799}
2800
2801static const struct seq_operations unix_seq_ops = {
2802        .start  = unix_seq_start,
2803        .next   = unix_seq_next,
2804        .stop   = unix_seq_stop,
2805        .show   = unix_seq_show,
2806};
2807#endif
2808
2809static const struct net_proto_family unix_family_ops = {
2810        .family = PF_UNIX,
2811        .create = unix_create,
2812        .owner  = THIS_MODULE,
2813};
2814
2815
2816static int __net_init unix_net_init(struct net *net)
2817{
2818        int error = -ENOMEM;
2819
2820        net->unx.sysctl_max_dgram_qlen = 10;
2821        if (unix_sysctl_register(net))
2822                goto out;
2823
2824#ifdef CONFIG_PROC_FS
2825        if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2826                        sizeof(struct seq_net_private))) {
2827                unix_sysctl_unregister(net);
2828                goto out;
2829        }
2830#endif
2831        error = 0;
2832out:
2833        return error;
2834}
2835
2836static void __net_exit unix_net_exit(struct net *net)
2837{
2838        unix_sysctl_unregister(net);
2839        remove_proc_entry("unix", net->proc_net);
2840}
2841
2842static struct pernet_operations unix_net_ops = {
2843        .init = unix_net_init,
2844        .exit = unix_net_exit,
2845};
2846
2847static int __init af_unix_init(void)
2848{
2849        int rc = -1;
2850
2851        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2852
2853        rc = proto_register(&unix_proto, 1);
2854        if (rc != 0) {
2855                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2856                goto out;
2857        }
2858
2859        sock_register(&unix_family_ops);
2860        register_pernet_subsys(&unix_net_ops);
2861out:
2862        return rc;
2863}
2864
2865static void __exit af_unix_exit(void)
2866{
2867        sock_unregister(PF_UNIX);
2868        proto_unregister(&unix_proto);
2869        unregister_pernet_subsys(&unix_net_ops);
2870}
2871
2872/* Earlier than device_initcall() so that other drivers invoking
2873   request_module() don't end up in a loop when modprobe tries
2874   to use a UNIX socket. But later than subsys_initcall() because
2875   we depend on stuff initialised there */
2876fs_initcall(af_unix_init);
2877module_exit(af_unix_exit);
2878
2879MODULE_LICENSE("GPL");
2880MODULE_ALIAS_NETPROTO(PF_UNIX);
2881