linux/net/socket.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * NET          An implementation of the SOCKET network access protocol.
   4 *
   5 * Version:     @(#)socket.c    1.1.93  18/02/95
   6 *
   7 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   8 *              Ross Biro
   9 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  10 *
  11 * Fixes:
  12 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  13 *                                      shutdown()
  14 *              Alan Cox        :       verify_area() fixes
  15 *              Alan Cox        :       Removed DDI
  16 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  17 *              Alan Cox        :       Moved a load of checks to the very
  18 *                                      top level.
  19 *              Alan Cox        :       Move address structures to/from user
  20 *                                      mode above the protocol layers.
  21 *              Rob Janssen     :       Allow 0 length sends.
  22 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  23 *                                      tty drivers).
  24 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  25 *              Jeff Uphoff     :       Made max number of sockets command-line
  26 *                                      configurable.
  27 *              Matti Aarnio    :       Made the number of sockets dynamic,
  28 *                                      to be allocated when needed, and mr.
  29 *                                      Uphoff's max is used as max to be
  30 *                                      allowed to allocate.
  31 *              Linus           :       Argh. removed all the socket allocation
  32 *                                      altogether: it's in the inode now.
  33 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  34 *                                      for NetROM and future kernel nfsd type
  35 *                                      stuff.
  36 *              Alan Cox        :       sendmsg/recvmsg basics.
  37 *              Tom Dyas        :       Export net symbols.
  38 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  39 *              Alan Cox        :       Added thread locking to sys_* calls
  40 *                                      for sockets. May have errors at the
  41 *                                      moment.
  42 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  43 *              Andi Kleen      :       Some small cleanups, optimizations,
  44 *                                      and fixed a copy_from_user() bug.
  45 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  46 *              Tigran Aivazian :       Made listen(2) backlog sanity checks
  47 *                                      protocol-independent
  48 *
  49 *      This module is effectively the top level interface to the BSD socket
  50 *      paradigm.
  51 *
  52 *      Based upon Swansea University Computer Society NET3.039
  53 */
  54
  55#include <linux/ethtool.h>
  56#include <linux/mm.h>
  57#include <linux/socket.h>
  58#include <linux/file.h>
  59#include <linux/net.h>
  60#include <linux/interrupt.h>
  61#include <linux/thread_info.h>
  62#include <linux/rcupdate.h>
  63#include <linux/netdevice.h>
  64#include <linux/proc_fs.h>
  65#include <linux/seq_file.h>
  66#include <linux/mutex.h>
  67#include <linux/if_bridge.h>
  68#include <linux/if_vlan.h>
  69#include <linux/ptp_classify.h>
  70#include <linux/init.h>
  71#include <linux/poll.h>
  72#include <linux/cache.h>
  73#include <linux/module.h>
  74#include <linux/highmem.h>
  75#include <linux/mount.h>
  76#include <linux/pseudo_fs.h>
  77#include <linux/security.h>
  78#include <linux/syscalls.h>
  79#include <linux/compat.h>
  80#include <linux/kmod.h>
  81#include <linux/audit.h>
  82#include <linux/wireless.h>
  83#include <linux/nsproxy.h>
  84#include <linux/magic.h>
  85#include <linux/slab.h>
  86#include <linux/xattr.h>
  87#include <linux/nospec.h>
  88#include <linux/indirect_call_wrapper.h>
  89
  90#include <linux/uaccess.h>
  91#include <asm/unistd.h>
  92
  93#include <net/compat.h>
  94#include <net/wext.h>
  95#include <net/cls_cgroup.h>
  96
  97#include <net/sock.h>
  98#include <linux/netfilter.h>
  99
 100#include <linux/if_tun.h>
 101#include <linux/ipv6_route.h>
 102#include <linux/route.h>
 103#include <linux/termios.h>
 104#include <linux/sockios.h>
 105#include <net/busy_poll.h>
 106#include <linux/errqueue.h>
 107#include <linux/ptp_clock_kernel.h>
 108
 109#ifdef CONFIG_NET_RX_BUSY_POLL
 110unsigned int sysctl_net_busy_read __read_mostly;
 111unsigned int sysctl_net_busy_poll __read_mostly;
 112#endif
 113
 114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
 115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 117
 118static int sock_close(struct inode *inode, struct file *file);
 119static __poll_t sock_poll(struct file *file,
 120                              struct poll_table_struct *wait);
 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 122#ifdef CONFIG_COMPAT
 123static long compat_sock_ioctl(struct file *file,
 124                              unsigned int cmd, unsigned long arg);
 125#endif
 126static int sock_fasync(int fd, struct file *filp, int on);
 127static ssize_t sock_sendpage(struct file *file, struct page *page,
 128                             int offset, size_t size, loff_t *ppos, int more);
 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 130                                struct pipe_inode_info *pipe, size_t len,
 131                                unsigned int flags);
 132
 133#ifdef CONFIG_PROC_FS
 134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
 135{
 136        struct socket *sock = f->private_data;
 137
 138        if (sock->ops->show_fdinfo)
 139                sock->ops->show_fdinfo(m, sock);
 140}
 141#else
 142#define sock_show_fdinfo NULL
 143#endif
 144
 145/*
 146 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 147 *      in the operation structures but are done directly via the socketcall() multiplexor.
 148 */
 149
 150static const struct file_operations socket_file_ops = {
 151        .owner =        THIS_MODULE,
 152        .llseek =       no_llseek,
 153        .read_iter =    sock_read_iter,
 154        .write_iter =   sock_write_iter,
 155        .poll =         sock_poll,
 156        .unlocked_ioctl = sock_ioctl,
 157#ifdef CONFIG_COMPAT
 158        .compat_ioctl = compat_sock_ioctl,
 159#endif
 160        .mmap =         sock_mmap,
 161        .release =      sock_close,
 162        .fasync =       sock_fasync,
 163        .sendpage =     sock_sendpage,
 164        .splice_write = generic_splice_sendpage,
 165        .splice_read =  sock_splice_read,
 166        .show_fdinfo =  sock_show_fdinfo,
 167};
 168
 169static const char * const pf_family_names[] = {
 170        [PF_UNSPEC]     = "PF_UNSPEC",
 171        [PF_UNIX]       = "PF_UNIX/PF_LOCAL",
 172        [PF_INET]       = "PF_INET",
 173        [PF_AX25]       = "PF_AX25",
 174        [PF_IPX]        = "PF_IPX",
 175        [PF_APPLETALK]  = "PF_APPLETALK",
 176        [PF_NETROM]     = "PF_NETROM",
 177        [PF_BRIDGE]     = "PF_BRIDGE",
 178        [PF_ATMPVC]     = "PF_ATMPVC",
 179        [PF_X25]        = "PF_X25",
 180        [PF_INET6]      = "PF_INET6",
 181        [PF_ROSE]       = "PF_ROSE",
 182        [PF_DECnet]     = "PF_DECnet",
 183        [PF_NETBEUI]    = "PF_NETBEUI",
 184        [PF_SECURITY]   = "PF_SECURITY",
 185        [PF_KEY]        = "PF_KEY",
 186        [PF_NETLINK]    = "PF_NETLINK/PF_ROUTE",
 187        [PF_PACKET]     = "PF_PACKET",
 188        [PF_ASH]        = "PF_ASH",
 189        [PF_ECONET]     = "PF_ECONET",
 190        [PF_ATMSVC]     = "PF_ATMSVC",
 191        [PF_RDS]        = "PF_RDS",
 192        [PF_SNA]        = "PF_SNA",
 193        [PF_IRDA]       = "PF_IRDA",
 194        [PF_PPPOX]      = "PF_PPPOX",
 195        [PF_WANPIPE]    = "PF_WANPIPE",
 196        [PF_LLC]        = "PF_LLC",
 197        [PF_IB]         = "PF_IB",
 198        [PF_MPLS]       = "PF_MPLS",
 199        [PF_CAN]        = "PF_CAN",
 200        [PF_TIPC]       = "PF_TIPC",
 201        [PF_BLUETOOTH]  = "PF_BLUETOOTH",
 202        [PF_IUCV]       = "PF_IUCV",
 203        [PF_RXRPC]      = "PF_RXRPC",
 204        [PF_ISDN]       = "PF_ISDN",
 205        [PF_PHONET]     = "PF_PHONET",
 206        [PF_IEEE802154] = "PF_IEEE802154",
 207        [PF_CAIF]       = "PF_CAIF",
 208        [PF_ALG]        = "PF_ALG",
 209        [PF_NFC]        = "PF_NFC",
 210        [PF_VSOCK]      = "PF_VSOCK",
 211        [PF_KCM]        = "PF_KCM",
 212        [PF_QIPCRTR]    = "PF_QIPCRTR",
 213        [PF_SMC]        = "PF_SMC",
 214        [PF_XDP]        = "PF_XDP",
 215        [PF_MCTP]       = "PF_MCTP",
 216};
 217
 218/*
 219 *      The protocol list. Each protocol is registered in here.
 220 */
 221
 222static DEFINE_SPINLOCK(net_family_lock);
 223static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 224
 225/*
 226 * Support routines.
 227 * Move socket addresses back and forth across the kernel/user
 228 * divide and look after the messy bits.
 229 */
 230
 231/**
 232 *      move_addr_to_kernel     -       copy a socket address into kernel space
 233 *      @uaddr: Address in user space
 234 *      @kaddr: Address in kernel space
 235 *      @ulen: Length in user space
 236 *
 237 *      The address is copied into kernel space. If the provided address is
 238 *      too long an error code of -EINVAL is returned. If the copy gives
 239 *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 240 */
 241
 242int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
 243{
 244        if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
 245                return -EINVAL;
 246        if (ulen == 0)
 247                return 0;
 248        if (copy_from_user(kaddr, uaddr, ulen))
 249                return -EFAULT;
 250        return audit_sockaddr(ulen, kaddr);
 251}
 252
 253/**
 254 *      move_addr_to_user       -       copy an address to user space
 255 *      @kaddr: kernel space address
 256 *      @klen: length of address in kernel
 257 *      @uaddr: user space address
 258 *      @ulen: pointer to user length field
 259 *
 260 *      The value pointed to by ulen on entry is the buffer length available.
 261 *      This is overwritten with the buffer space used. -EINVAL is returned
 262 *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 263 *      is returned if either the buffer or the length field are not
 264 *      accessible.
 265 *      After copying the data up to the limit the user specifies, the true
 266 *      length of the data is written over the length limit the user
 267 *      specified. Zero is returned for a success.
 268 */
 269
 270static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 271                             void __user *uaddr, int __user *ulen)
 272{
 273        int err;
 274        int len;
 275
 276        BUG_ON(klen > sizeof(struct sockaddr_storage));
 277        err = get_user(len, ulen);
 278        if (err)
 279                return err;
 280        if (len > klen)
 281                len = klen;
 282        if (len < 0)
 283                return -EINVAL;
 284        if (len) {
 285                if (audit_sockaddr(klen, kaddr))
 286                        return -ENOMEM;
 287                if (copy_to_user(uaddr, kaddr, len))
 288                        return -EFAULT;
 289        }
 290        /*
 291         *      "fromlen shall refer to the value before truncation.."
 292         *                      1003.1g
 293         */
 294        return __put_user(klen, ulen);
 295}
 296
 297static struct kmem_cache *sock_inode_cachep __ro_after_init;
 298
 299static struct inode *sock_alloc_inode(struct super_block *sb)
 300{
 301        struct socket_alloc *ei;
 302
 303        ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 304        if (!ei)
 305                return NULL;
 306        init_waitqueue_head(&ei->socket.wq.wait);
 307        ei->socket.wq.fasync_list = NULL;
 308        ei->socket.wq.flags = 0;
 309
 310        ei->socket.state = SS_UNCONNECTED;
 311        ei->socket.flags = 0;
 312        ei->socket.ops = NULL;
 313        ei->socket.sk = NULL;
 314        ei->socket.file = NULL;
 315
 316        return &ei->vfs_inode;
 317}
 318
 319static void sock_free_inode(struct inode *inode)
 320{
 321        struct socket_alloc *ei;
 322
 323        ei = container_of(inode, struct socket_alloc, vfs_inode);
 324        kmem_cache_free(sock_inode_cachep, ei);
 325}
 326
 327static void init_once(void *foo)
 328{
 329        struct socket_alloc *ei = (struct socket_alloc *)foo;
 330
 331        inode_init_once(&ei->vfs_inode);
 332}
 333
 334static void init_inodecache(void)
 335{
 336        sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 337                                              sizeof(struct socket_alloc),
 338                                              0,
 339                                              (SLAB_HWCACHE_ALIGN |
 340                                               SLAB_RECLAIM_ACCOUNT |
 341                                               SLAB_MEM_SPREAD | SLAB_ACCOUNT),
 342                                              init_once);
 343        BUG_ON(sock_inode_cachep == NULL);
 344}
 345
 346static const struct super_operations sockfs_ops = {
 347        .alloc_inode    = sock_alloc_inode,
 348        .free_inode     = sock_free_inode,
 349        .statfs         = simple_statfs,
 350};
 351
 352/*
 353 * sockfs_dname() is called from d_path().
 354 */
 355static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 356{
 357        return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
 358                                d_inode(dentry)->i_ino);
 359}
 360
 361static const struct dentry_operations sockfs_dentry_operations = {
 362        .d_dname  = sockfs_dname,
 363};
 364
 365static int sockfs_xattr_get(const struct xattr_handler *handler,
 366                            struct dentry *dentry, struct inode *inode,
 367                            const char *suffix, void *value, size_t size)
 368{
 369        if (value) {
 370                if (dentry->d_name.len + 1 > size)
 371                        return -ERANGE;
 372                memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
 373        }
 374        return dentry->d_name.len + 1;
 375}
 376
 377#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
 378#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
 379#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
 380
 381static const struct xattr_handler sockfs_xattr_handler = {
 382        .name = XATTR_NAME_SOCKPROTONAME,
 383        .get = sockfs_xattr_get,
 384};
 385
 386static int sockfs_security_xattr_set(const struct xattr_handler *handler,
 387                                     struct user_namespace *mnt_userns,
 388                                     struct dentry *dentry, struct inode *inode,
 389                                     const char *suffix, const void *value,
 390                                     size_t size, int flags)
 391{
 392        /* Handled by LSM. */
 393        return -EAGAIN;
 394}
 395
 396static const struct xattr_handler sockfs_security_xattr_handler = {
 397        .prefix = XATTR_SECURITY_PREFIX,
 398        .set = sockfs_security_xattr_set,
 399};
 400
 401static const struct xattr_handler *sockfs_xattr_handlers[] = {
 402        &sockfs_xattr_handler,
 403        &sockfs_security_xattr_handler,
 404        NULL
 405};
 406
 407static int sockfs_init_fs_context(struct fs_context *fc)
 408{
 409        struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
 410        if (!ctx)
 411                return -ENOMEM;
 412        ctx->ops = &sockfs_ops;
 413        ctx->dops = &sockfs_dentry_operations;
 414        ctx->xattr = sockfs_xattr_handlers;
 415        return 0;
 416}
 417
 418static struct vfsmount *sock_mnt __read_mostly;
 419
 420static struct file_system_type sock_fs_type = {
 421        .name =         "sockfs",
 422        .init_fs_context = sockfs_init_fs_context,
 423        .kill_sb =      kill_anon_super,
 424};
 425
 426/*
 427 *      Obtains the first available file descriptor and sets it up for use.
 428 *
 429 *      These functions create file structures and maps them to fd space
 430 *      of the current process. On success it returns file descriptor
 431 *      and file struct implicitly stored in sock->file.
 432 *      Note that another thread may close file descriptor before we return
 433 *      from this function. We use the fact that now we do not refer
 434 *      to socket after mapping. If one day we will need it, this
 435 *      function will increment ref. count on file by 1.
 436 *
 437 *      In any case returned fd MAY BE not valid!
 438 *      This race condition is unavoidable
 439 *      with shared fd spaces, we cannot solve it inside kernel,
 440 *      but we take care of internal coherence yet.
 441 */
 442
 443/**
 444 *      sock_alloc_file - Bind a &socket to a &file
 445 *      @sock: socket
 446 *      @flags: file status flags
 447 *      @dname: protocol name
 448 *
 449 *      Returns the &file bound with @sock, implicitly storing it
 450 *      in sock->file. If dname is %NULL, sets to "".
 451 *      On failure the return is a ERR pointer (see linux/err.h).
 452 *      This function uses GFP_KERNEL internally.
 453 */
 454
 455struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 456{
 457        struct file *file;
 458
 459        if (!dname)
 460                dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
 461
 462        file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
 463                                O_RDWR | (flags & O_NONBLOCK),
 464                                &socket_file_ops);
 465        if (IS_ERR(file)) {
 466                sock_release(sock);
 467                return file;
 468        }
 469
 470        sock->file = file;
 471        file->private_data = sock;
 472        stream_open(SOCK_INODE(sock), file);
 473        return file;
 474}
 475EXPORT_SYMBOL(sock_alloc_file);
 476
 477static int sock_map_fd(struct socket *sock, int flags)
 478{
 479        struct file *newfile;
 480        int fd = get_unused_fd_flags(flags);
 481        if (unlikely(fd < 0)) {
 482                sock_release(sock);
 483                return fd;
 484        }
 485
 486        newfile = sock_alloc_file(sock, flags, NULL);
 487        if (!IS_ERR(newfile)) {
 488                fd_install(fd, newfile);
 489                return fd;
 490        }
 491
 492        put_unused_fd(fd);
 493        return PTR_ERR(newfile);
 494}
 495
 496/**
 497 *      sock_from_file - Return the &socket bounded to @file.
 498 *      @file: file
 499 *
 500 *      On failure returns %NULL.
 501 */
 502
 503struct socket *sock_from_file(struct file *file)
 504{
 505        if (file->f_op == &socket_file_ops)
 506                return file->private_data;      /* set in sock_map_fd */
 507
 508        return NULL;
 509}
 510EXPORT_SYMBOL(sock_from_file);
 511
 512/**
 513 *      sockfd_lookup - Go from a file number to its socket slot
 514 *      @fd: file handle
 515 *      @err: pointer to an error code return
 516 *
 517 *      The file handle passed in is locked and the socket it is bound
 518 *      to is returned. If an error occurs the err pointer is overwritten
 519 *      with a negative errno code and NULL is returned. The function checks
 520 *      for both invalid handles and passing a handle which is not a socket.
 521 *
 522 *      On a success the socket object pointer is returned.
 523 */
 524
 525struct socket *sockfd_lookup(int fd, int *err)
 526{
 527        struct file *file;
 528        struct socket *sock;
 529
 530        file = fget(fd);
 531        if (!file) {
 532                *err = -EBADF;
 533                return NULL;
 534        }
 535
 536        sock = sock_from_file(file);
 537        if (!sock) {
 538                *err = -ENOTSOCK;
 539                fput(file);
 540        }
 541        return sock;
 542}
 543EXPORT_SYMBOL(sockfd_lookup);
 544
 545static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 546{
 547        struct fd f = fdget(fd);
 548        struct socket *sock;
 549
 550        *err = -EBADF;
 551        if (f.file) {
 552                sock = sock_from_file(f.file);
 553                if (likely(sock)) {
 554                        *fput_needed = f.flags & FDPUT_FPUT;
 555                        return sock;
 556                }
 557                *err = -ENOTSOCK;
 558                fdput(f);
 559        }
 560        return NULL;
 561}
 562
 563static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
 564                                size_t size)
 565{
 566        ssize_t len;
 567        ssize_t used = 0;
 568
 569        len = security_inode_listsecurity(d_inode(dentry), buffer, size);
 570        if (len < 0)
 571                return len;
 572        used += len;
 573        if (buffer) {
 574                if (size < used)
 575                        return -ERANGE;
 576                buffer += len;
 577        }
 578
 579        len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
 580        used += len;
 581        if (buffer) {
 582                if (size < used)
 583                        return -ERANGE;
 584                memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
 585                buffer += len;
 586        }
 587
 588        return used;
 589}
 590
 591static int sockfs_setattr(struct user_namespace *mnt_userns,
 592                          struct dentry *dentry, struct iattr *iattr)
 593{
 594        int err = simple_setattr(&init_user_ns, dentry, iattr);
 595
 596        if (!err && (iattr->ia_valid & ATTR_UID)) {
 597                struct socket *sock = SOCKET_I(d_inode(dentry));
 598
 599                if (sock->sk)
 600                        sock->sk->sk_uid = iattr->ia_uid;
 601                else
 602                        err = -ENOENT;
 603        }
 604
 605        return err;
 606}
 607
 608static const struct inode_operations sockfs_inode_ops = {
 609        .listxattr = sockfs_listxattr,
 610        .setattr = sockfs_setattr,
 611};
 612
 613/**
 614 *      sock_alloc - allocate a socket
 615 *
 616 *      Allocate a new inode and socket object. The two are bound together
 617 *      and initialised. The socket is then returned. If we are out of inodes
 618 *      NULL is returned. This functions uses GFP_KERNEL internally.
 619 */
 620
 621struct socket *sock_alloc(void)
 622{
 623        struct inode *inode;
 624        struct socket *sock;
 625
 626        inode = new_inode_pseudo(sock_mnt->mnt_sb);
 627        if (!inode)
 628                return NULL;
 629
 630        sock = SOCKET_I(inode);
 631
 632        inode->i_ino = get_next_ino();
 633        inode->i_mode = S_IFSOCK | S_IRWXUGO;
 634        inode->i_uid = current_fsuid();
 635        inode->i_gid = current_fsgid();
 636        inode->i_op = &sockfs_inode_ops;
 637
 638        return sock;
 639}
 640EXPORT_SYMBOL(sock_alloc);
 641
 642static void __sock_release(struct socket *sock, struct inode *inode)
 643{
 644        if (sock->ops) {
 645                struct module *owner = sock->ops->owner;
 646
 647                if (inode)
 648                        inode_lock(inode);
 649                sock->ops->release(sock);
 650                sock->sk = NULL;
 651                if (inode)
 652                        inode_unlock(inode);
 653                sock->ops = NULL;
 654                module_put(owner);
 655        }
 656
 657        if (sock->wq.fasync_list)
 658                pr_err("%s: fasync list not empty!\n", __func__);
 659
 660        if (!sock->file) {
 661                iput(SOCK_INODE(sock));
 662                return;
 663        }
 664        sock->file = NULL;
 665}
 666
 667/**
 668 *      sock_release - close a socket
 669 *      @sock: socket to close
 670 *
 671 *      The socket is released from the protocol stack if it has a release
 672 *      callback, and the inode is then released if the socket is bound to
 673 *      an inode not a file.
 674 */
 675void sock_release(struct socket *sock)
 676{
 677        __sock_release(sock, NULL);
 678}
 679EXPORT_SYMBOL(sock_release);
 680
 681void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
 682{
 683        u8 flags = *tx_flags;
 684
 685        if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
 686                flags |= SKBTX_HW_TSTAMP;
 687
 688        if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
 689                flags |= SKBTX_SW_TSTAMP;
 690
 691        if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
 692                flags |= SKBTX_SCHED_TSTAMP;
 693
 694        *tx_flags = flags;
 695}
 696EXPORT_SYMBOL(__sock_tx_timestamp);
 697
 698INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
 699                                           size_t));
 700INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
 701                                            size_t));
 702static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 703{
 704        int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
 705                                     inet_sendmsg, sock, msg,
 706                                     msg_data_left(msg));
 707        BUG_ON(ret == -EIOCBQUEUED);
 708        return ret;
 709}
 710
 711/**
 712 *      sock_sendmsg - send a message through @sock
 713 *      @sock: socket
 714 *      @msg: message to send
 715 *
 716 *      Sends @msg through @sock, passing through LSM.
 717 *      Returns the number of bytes sent, or an error code.
 718 */
 719int sock_sendmsg(struct socket *sock, struct msghdr *msg)
 720{
 721        int err = security_socket_sendmsg(sock, msg,
 722                                          msg_data_left(msg));
 723
 724        return err ?: sock_sendmsg_nosec(sock, msg);
 725}
 726EXPORT_SYMBOL(sock_sendmsg);
 727
 728/**
 729 *      kernel_sendmsg - send a message through @sock (kernel-space)
 730 *      @sock: socket
 731 *      @msg: message header
 732 *      @vec: kernel vec
 733 *      @num: vec array length
 734 *      @size: total message data size
 735 *
 736 *      Builds the message data with @vec and sends it through @sock.
 737 *      Returns the number of bytes sent, or an error code.
 738 */
 739
 740int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 741                   struct kvec *vec, size_t num, size_t size)
 742{
 743        iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 744        return sock_sendmsg(sock, msg);
 745}
 746EXPORT_SYMBOL(kernel_sendmsg);
 747
 748/**
 749 *      kernel_sendmsg_locked - send a message through @sock (kernel-space)
 750 *      @sk: sock
 751 *      @msg: message header
 752 *      @vec: output s/g array
 753 *      @num: output s/g array length
 754 *      @size: total message data size
 755 *
 756 *      Builds the message data with @vec and sends it through @sock.
 757 *      Returns the number of bytes sent, or an error code.
 758 *      Caller must hold @sk.
 759 */
 760
 761int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
 762                          struct kvec *vec, size_t num, size_t size)
 763{
 764        struct socket *sock = sk->sk_socket;
 765
 766        if (!sock->ops->sendmsg_locked)
 767                return sock_no_sendmsg_locked(sk, msg, size);
 768
 769        iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 770
 771        return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
 772}
 773EXPORT_SYMBOL(kernel_sendmsg_locked);
 774
 775static bool skb_is_err_queue(const struct sk_buff *skb)
 776{
 777        /* pkt_type of skbs enqueued on the error queue are set to
 778         * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
 779         * in recvmsg, since skbs received on a local socket will never
 780         * have a pkt_type of PACKET_OUTGOING.
 781         */
 782        return skb->pkt_type == PACKET_OUTGOING;
 783}
 784
 785/* On transmit, software and hardware timestamps are returned independently.
 786 * As the two skb clones share the hardware timestamp, which may be updated
 787 * before the software timestamp is received, a hardware TX timestamp may be
 788 * returned only if there is no software TX timestamp. Ignore false software
 789 * timestamps, which may be made in the __sock_recv_timestamp() call when the
 790 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
 791 * hardware timestamp.
 792 */
 793static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
 794{
 795        return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
 796}
 797
 798static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 799{
 800        struct scm_ts_pktinfo ts_pktinfo;
 801        struct net_device *orig_dev;
 802
 803        if (!skb_mac_header_was_set(skb))
 804                return;
 805
 806        memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
 807
 808        rcu_read_lock();
 809        orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
 810        if (orig_dev)
 811                ts_pktinfo.if_index = orig_dev->ifindex;
 812        rcu_read_unlock();
 813
 814        ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
 815        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
 816                 sizeof(ts_pktinfo), &ts_pktinfo);
 817}
 818
 819/*
 820 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
 821 */
 822void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 823        struct sk_buff *skb)
 824{
 825        int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 826        int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 827        struct scm_timestamping_internal tss;
 828
 829        int empty = 1, false_tstamp = 0;
 830        struct skb_shared_hwtstamps *shhwtstamps =
 831                skb_hwtstamps(skb);
 832
 833        /* Race occurred between timestamp enabling and packet
 834           receiving.  Fill in the current time for now. */
 835        if (need_software_tstamp && skb->tstamp == 0) {
 836                __net_timestamp(skb);
 837                false_tstamp = 1;
 838        }
 839
 840        if (need_software_tstamp) {
 841                if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 842                        if (new_tstamp) {
 843                                struct __kernel_sock_timeval tv;
 844
 845                                skb_get_new_timestamp(skb, &tv);
 846                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
 847                                         sizeof(tv), &tv);
 848                        } else {
 849                                struct __kernel_old_timeval tv;
 850
 851                                skb_get_timestamp(skb, &tv);
 852                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 853                                         sizeof(tv), &tv);
 854                        }
 855                } else {
 856                        if (new_tstamp) {
 857                                struct __kernel_timespec ts;
 858
 859                                skb_get_new_timestampns(skb, &ts);
 860                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
 861                                         sizeof(ts), &ts);
 862                        } else {
 863                                struct __kernel_old_timespec ts;
 864
 865                                skb_get_timestampns(skb, &ts);
 866                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 867                                         sizeof(ts), &ts);
 868                        }
 869                }
 870        }
 871
 872        memset(&tss, 0, sizeof(tss));
 873        if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
 874            ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
 875                empty = 0;
 876        if (shhwtstamps &&
 877            (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 878            !skb_is_swtx_tstamp(skb, false_tstamp)) {
 879                if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
 880                        ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
 881
 882                if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
 883                                             tss.ts + 2)) {
 884                        empty = 0;
 885
 886                        if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
 887                            !skb_is_err_queue(skb))
 888                                put_ts_pktinfo(msg, skb);
 889                }
 890        }
 891        if (!empty) {
 892                if (sock_flag(sk, SOCK_TSTAMP_NEW))
 893                        put_cmsg_scm_timestamping64(msg, &tss);
 894                else
 895                        put_cmsg_scm_timestamping(msg, &tss);
 896
 897                if (skb_is_err_queue(skb) && skb->len &&
 898                    SKB_EXT_ERR(skb)->opt_stats)
 899                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
 900                                 skb->len, skb->data);
 901        }
 902}
 903EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 904
 905void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
 906        struct sk_buff *skb)
 907{
 908        int ack;
 909
 910        if (!sock_flag(sk, SOCK_WIFI_STATUS))
 911                return;
 912        if (!skb->wifi_acked_valid)
 913                return;
 914
 915        ack = skb->wifi_acked;
 916
 917        put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
 918}
 919EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
 920
 921static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 922                                   struct sk_buff *skb)
 923{
 924        if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
 925                put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
 926                        sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
 927}
 928
 929void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 930        struct sk_buff *skb)
 931{
 932        sock_recv_timestamp(msg, sk, skb);
 933        sock_recv_drops(msg, sk, skb);
 934}
 935EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 936
 937INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
 938                                           size_t, int));
 939INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
 940                                            size_t, int));
 941static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 942                                     int flags)
 943{
 944        return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
 945                                  inet_recvmsg, sock, msg, msg_data_left(msg),
 946                                  flags);
 947}
 948
 949/**
 950 *      sock_recvmsg - receive a message from @sock
 951 *      @sock: socket
 952 *      @msg: message to receive
 953 *      @flags: message flags
 954 *
 955 *      Receives @msg from @sock, passing through LSM. Returns the total number
 956 *      of bytes received, or an error.
 957 */
 958int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
 959{
 960        int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
 961
 962        return err ?: sock_recvmsg_nosec(sock, msg, flags);
 963}
 964EXPORT_SYMBOL(sock_recvmsg);
 965
 966/**
 967 *      kernel_recvmsg - Receive a message from a socket (kernel space)
 968 *      @sock: The socket to receive the message from
 969 *      @msg: Received message
 970 *      @vec: Input s/g array for message data
 971 *      @num: Size of input s/g array
 972 *      @size: Number of bytes to read
 973 *      @flags: Message flags (MSG_DONTWAIT, etc...)
 974 *
 975 *      On return the msg structure contains the scatter/gather array passed in the
 976 *      vec argument. The array is modified so that it consists of the unfilled
 977 *      portion of the original array.
 978 *
 979 *      The returned value is the total number of bytes received, or an error.
 980 */
 981
 982int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 983                   struct kvec *vec, size_t num, size_t size, int flags)
 984{
 985        msg->msg_control_is_user = false;
 986        iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
 987        return sock_recvmsg(sock, msg, flags);
 988}
 989EXPORT_SYMBOL(kernel_recvmsg);
 990
 991static ssize_t sock_sendpage(struct file *file, struct page *page,
 992                             int offset, size_t size, loff_t *ppos, int more)
 993{
 994        struct socket *sock;
 995        int flags;
 996
 997        sock = file->private_data;
 998
 999        flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
1000        /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1001        flags |= more;
1002
1003        return kernel_sendpage(sock, page, offset, size, flags);
1004}
1005
1006static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
1007                                struct pipe_inode_info *pipe, size_t len,
1008                                unsigned int flags)
1009{
1010        struct socket *sock = file->private_data;
1011
1012        if (unlikely(!sock->ops->splice_read))
1013                return generic_file_splice_read(file, ppos, pipe, len, flags);
1014
1015        return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1016}
1017
1018static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
1019{
1020        struct file *file = iocb->ki_filp;
1021        struct socket *sock = file->private_data;
1022        struct msghdr msg = {.msg_iter = *to,
1023                             .msg_iocb = iocb};
1024        ssize_t res;
1025
1026        if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
1027                msg.msg_flags = MSG_DONTWAIT;
1028
1029        if (iocb->ki_pos != 0)
1030                return -ESPIPE;
1031
1032        if (!iov_iter_count(to))        /* Match SYS5 behaviour */
1033                return 0;
1034
1035        res = sock_recvmsg(sock, &msg, msg.msg_flags);
1036        *to = msg.msg_iter;
1037        return res;
1038}
1039
1040static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
1041{
1042        struct file *file = iocb->ki_filp;
1043        struct socket *sock = file->private_data;
1044        struct msghdr msg = {.msg_iter = *from,
1045                             .msg_iocb = iocb};
1046        ssize_t res;
1047
1048        if (iocb->ki_pos != 0)
1049                return -ESPIPE;
1050
1051        if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
1052                msg.msg_flags = MSG_DONTWAIT;
1053
1054        if (sock->type == SOCK_SEQPACKET)
1055                msg.msg_flags |= MSG_EOR;
1056
1057        res = sock_sendmsg(sock, &msg);
1058        *from = msg.msg_iter;
1059        return res;
1060}
1061
1062/*
1063 * Atomic setting of ioctl hooks to avoid race
1064 * with module unload.
1065 */
1066
1067static DEFINE_MUTEX(br_ioctl_mutex);
1068static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1069                            unsigned int cmd, struct ifreq *ifr,
1070                            void __user *uarg);
1071
1072void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1073                             unsigned int cmd, struct ifreq *ifr,
1074                             void __user *uarg))
1075{
1076        mutex_lock(&br_ioctl_mutex);
1077        br_ioctl_hook = hook;
1078        mutex_unlock(&br_ioctl_mutex);
1079}
1080EXPORT_SYMBOL(brioctl_set);
1081
1082int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1083                  struct ifreq *ifr, void __user *uarg)
1084{
1085        int err = -ENOPKG;
1086
1087        if (!br_ioctl_hook)
1088                request_module("bridge");
1089
1090        mutex_lock(&br_ioctl_mutex);
1091        if (br_ioctl_hook)
1092                err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1093        mutex_unlock(&br_ioctl_mutex);
1094
1095        return err;
1096}
1097
1098static DEFINE_MUTEX(vlan_ioctl_mutex);
1099static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1100
1101void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1102{
1103        mutex_lock(&vlan_ioctl_mutex);
1104        vlan_ioctl_hook = hook;
1105        mutex_unlock(&vlan_ioctl_mutex);
1106}
1107EXPORT_SYMBOL(vlan_ioctl_set);
1108
1109static long sock_do_ioctl(struct net *net, struct socket *sock,
1110                          unsigned int cmd, unsigned long arg)
1111{
1112        struct ifreq ifr;
1113        bool need_copyout;
1114        int err;
1115        void __user *argp = (void __user *)arg;
1116        void __user *data;
1117
1118        err = sock->ops->ioctl(sock, cmd, arg);
1119
1120        /*
1121         * If this ioctl is unknown try to hand it down
1122         * to the NIC driver.
1123         */
1124        if (err != -ENOIOCTLCMD)
1125                return err;
1126
1127        if (!is_socket_ioctl_cmd(cmd))
1128                return -ENOTTY;
1129
1130        if (get_user_ifreq(&ifr, &data, argp))
1131                return -EFAULT;
1132        err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
1133        if (!err && need_copyout)
1134                if (put_user_ifreq(&ifr, argp))
1135                        return -EFAULT;
1136
1137        return err;
1138}
1139
1140/*
1141 *      With an ioctl, arg may well be a user mode pointer, but we don't know
1142 *      what to do with it - that's up to the protocol still.
1143 */
1144
1145static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1146{
1147        struct socket *sock;
1148        struct sock *sk;
1149        void __user *argp = (void __user *)arg;
1150        int pid, err;
1151        struct net *net;
1152
1153        sock = file->private_data;
1154        sk = sock->sk;
1155        net = sock_net(sk);
1156        if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1157                struct ifreq ifr;
1158                void __user *data;
1159                bool need_copyout;
1160                if (get_user_ifreq(&ifr, &data, argp))
1161                        return -EFAULT;
1162                err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
1163                if (!err && need_copyout)
1164                        if (put_user_ifreq(&ifr, argp))
1165                                return -EFAULT;
1166        } else
1167#ifdef CONFIG_WEXT_CORE
1168        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1169                err = wext_handle_ioctl(net, cmd, argp);
1170        } else
1171#endif
1172                switch (cmd) {
1173                case FIOSETOWN:
1174                case SIOCSPGRP:
1175                        err = -EFAULT;
1176                        if (get_user(pid, (int __user *)argp))
1177                                break;
1178                        err = f_setown(sock->file, pid, 1);
1179                        break;
1180                case FIOGETOWN:
1181                case SIOCGPGRP:
1182                        err = put_user(f_getown(sock->file),
1183                                       (int __user *)argp);
1184                        break;
1185                case SIOCGIFBR:
1186                case SIOCSIFBR:
1187                case SIOCBRADDBR:
1188                case SIOCBRDELBR:
1189                        err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1190                        break;
1191                case SIOCGIFVLAN:
1192                case SIOCSIFVLAN:
1193                        err = -ENOPKG;
1194                        if (!vlan_ioctl_hook)
1195                                request_module("8021q");
1196
1197                        mutex_lock(&vlan_ioctl_mutex);
1198                        if (vlan_ioctl_hook)
1199                                err = vlan_ioctl_hook(net, argp);
1200                        mutex_unlock(&vlan_ioctl_mutex);
1201                        break;
1202                case SIOCGSKNS:
1203                        err = -EPERM;
1204                        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1205                                break;
1206
1207                        err = open_related_ns(&net->ns, get_net_ns);
1208                        break;
1209                case SIOCGSTAMP_OLD:
1210                case SIOCGSTAMPNS_OLD:
1211                        if (!sock->ops->gettstamp) {
1212                                err = -ENOIOCTLCMD;
1213                                break;
1214                        }
1215                        err = sock->ops->gettstamp(sock, argp,
1216                                                   cmd == SIOCGSTAMP_OLD,
1217                                                   !IS_ENABLED(CONFIG_64BIT));
1218                        break;
1219                case SIOCGSTAMP_NEW:
1220                case SIOCGSTAMPNS_NEW:
1221                        if (!sock->ops->gettstamp) {
1222                                err = -ENOIOCTLCMD;
1223                                break;
1224                        }
1225                        err = sock->ops->gettstamp(sock, argp,
1226                                                   cmd == SIOCGSTAMP_NEW,
1227                                                   false);
1228                        break;
1229
1230                case SIOCGIFCONF:
1231                        err = dev_ifconf(net, argp);
1232                        break;
1233
1234                default:
1235                        err = sock_do_ioctl(net, sock, cmd, arg);
1236                        break;
1237                }
1238        return err;
1239}
1240
1241/**
1242 *      sock_create_lite - creates a socket
1243 *      @family: protocol family (AF_INET, ...)
1244 *      @type: communication type (SOCK_STREAM, ...)
1245 *      @protocol: protocol (0, ...)
1246 *      @res: new socket
1247 *
1248 *      Creates a new socket and assigns it to @res, passing through LSM.
1249 *      The new socket initialization is not complete, see kernel_accept().
1250 *      Returns 0 or an error. On failure @res is set to %NULL.
1251 *      This function internally uses GFP_KERNEL.
1252 */
1253
1254int sock_create_lite(int family, int type, int protocol, struct socket **res)
1255{
1256        int err;
1257        struct socket *sock = NULL;
1258
1259        err = security_socket_create(family, type, protocol, 1);
1260        if (err)
1261                goto out;
1262
1263        sock = sock_alloc();
1264        if (!sock) {
1265                err = -ENOMEM;
1266                goto out;
1267        }
1268
1269        sock->type = type;
1270        err = security_socket_post_create(sock, family, type, protocol, 1);
1271        if (err)
1272                goto out_release;
1273
1274out:
1275        *res = sock;
1276        return err;
1277out_release:
1278        sock_release(sock);
1279        sock = NULL;
1280        goto out;
1281}
1282EXPORT_SYMBOL(sock_create_lite);
1283
1284/* No kernel lock held - perfect */
1285static __poll_t sock_poll(struct file *file, poll_table *wait)
1286{
1287        struct socket *sock = file->private_data;
1288        __poll_t events = poll_requested_events(wait), flag = 0;
1289
1290        if (!sock->ops->poll)
1291                return 0;
1292
1293        if (sk_can_busy_loop(sock->sk)) {
1294                /* poll once if requested by the syscall */
1295                if (events & POLL_BUSY_LOOP)
1296                        sk_busy_loop(sock->sk, 1);
1297
1298                /* if this socket can poll_ll, tell the system call */
1299                flag = POLL_BUSY_LOOP;
1300        }
1301
1302        return sock->ops->poll(file, sock, wait) | flag;
1303}
1304
1305static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1306{
1307        struct socket *sock = file->private_data;
1308
1309        return sock->ops->mmap(file, sock, vma);
1310}
1311
1312static int sock_close(struct inode *inode, struct file *filp)
1313{
1314        __sock_release(SOCKET_I(inode), inode);
1315        return 0;
1316}
1317
1318/*
1319 *      Update the socket async list
1320 *
1321 *      Fasync_list locking strategy.
1322 *
1323 *      1. fasync_list is modified only under process context socket lock
1324 *         i.e. under semaphore.
1325 *      2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1326 *         or under socket lock
1327 */
1328
1329static int sock_fasync(int fd, struct file *filp, int on)
1330{
1331        struct socket *sock = filp->private_data;
1332        struct sock *sk = sock->sk;
1333        struct socket_wq *wq = &sock->wq;
1334
1335        if (sk == NULL)
1336                return -EINVAL;
1337
1338        lock_sock(sk);
1339        fasync_helper(fd, filp, on, &wq->fasync_list);
1340
1341        if (!wq->fasync_list)
1342                sock_reset_flag(sk, SOCK_FASYNC);
1343        else
1344                sock_set_flag(sk, SOCK_FASYNC);
1345
1346        release_sock(sk);
1347        return 0;
1348}
1349
1350/* This function may be called only under rcu_lock */
1351
1352int sock_wake_async(struct socket_wq *wq, int how, int band)
1353{
1354        if (!wq || !wq->fasync_list)
1355                return -1;
1356
1357        switch (how) {
1358        case SOCK_WAKE_WAITD:
1359                if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1360                        break;
1361                goto call_kill;
1362        case SOCK_WAKE_SPACE:
1363                if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1364                        break;
1365                fallthrough;
1366        case SOCK_WAKE_IO:
1367call_kill:
1368                kill_fasync(&wq->fasync_list, SIGIO, band);
1369                break;
1370        case SOCK_WAKE_URG:
1371                kill_fasync(&wq->fasync_list, SIGURG, band);
1372        }
1373
1374        return 0;
1375}
1376EXPORT_SYMBOL(sock_wake_async);
1377
1378/**
1379 *      __sock_create - creates a socket
1380 *      @net: net namespace
1381 *      @family: protocol family (AF_INET, ...)
1382 *      @type: communication type (SOCK_STREAM, ...)
1383 *      @protocol: protocol (0, ...)
1384 *      @res: new socket
1385 *      @kern: boolean for kernel space sockets
1386 *
1387 *      Creates a new socket and assigns it to @res, passing through LSM.
1388 *      Returns 0 or an error. On failure @res is set to %NULL. @kern must
1389 *      be set to true if the socket resides in kernel space.
1390 *      This function internally uses GFP_KERNEL.
1391 */
1392
1393int __sock_create(struct net *net, int family, int type, int protocol,
1394                         struct socket **res, int kern)
1395{
1396        int err;
1397        struct socket *sock;
1398        const struct net_proto_family *pf;
1399
1400        /*
1401         *      Check protocol is in range
1402         */
1403        if (family < 0 || family >= NPROTO)
1404                return -EAFNOSUPPORT;
1405        if (type < 0 || type >= SOCK_MAX)
1406                return -EINVAL;
1407
1408        /* Compatibility.
1409
1410           This uglymoron is moved from INET layer to here to avoid
1411           deadlock in module load.
1412         */
1413        if (family == PF_INET && type == SOCK_PACKET) {
1414                pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1415                             current->comm);
1416                family = PF_PACKET;
1417        }
1418
1419        err = security_socket_create(family, type, protocol, kern);
1420        if (err)
1421                return err;
1422
1423        /*
1424         *      Allocate the socket and allow the family to set things up. if
1425         *      the protocol is 0, the family is instructed to select an appropriate
1426         *      default.
1427         */
1428        sock = sock_alloc();
1429        if (!sock) {
1430                net_warn_ratelimited("socket: no more sockets\n");
1431                return -ENFILE; /* Not exactly a match, but its the
1432                                   closest posix thing */
1433        }
1434
1435        sock->type = type;
1436
1437#ifdef CONFIG_MODULES
1438        /* Attempt to load a protocol module if the find failed.
1439         *
1440         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1441         * requested real, full-featured networking support upon configuration.
1442         * Otherwise module support will break!
1443         */
1444        if (rcu_access_pointer(net_families[family]) == NULL)
1445                request_module("net-pf-%d", family);
1446#endif
1447
1448        rcu_read_lock();
1449        pf = rcu_dereference(net_families[family]);
1450        err = -EAFNOSUPPORT;
1451        if (!pf)
1452                goto out_release;
1453
1454        /*
1455         * We will call the ->create function, that possibly is in a loadable
1456         * module, so we have to bump that loadable module refcnt first.
1457         */
1458        if (!try_module_get(pf->owner))
1459                goto out_release;
1460
1461        /* Now protected by module ref count */
1462        rcu_read_unlock();
1463
1464        err = pf->create(net, sock, protocol, kern);
1465        if (err < 0)
1466                goto out_module_put;
1467
1468        /*
1469         * Now to bump the refcnt of the [loadable] module that owns this
1470         * socket at sock_release time we decrement its refcnt.
1471         */
1472        if (!try_module_get(sock->ops->owner))
1473                goto out_module_busy;
1474
1475        /*
1476         * Now that we're done with the ->create function, the [loadable]
1477         * module can have its refcnt decremented
1478         */
1479        module_put(pf->owner);
1480        err = security_socket_post_create(sock, family, type, protocol, kern);
1481        if (err)
1482                goto out_sock_release;
1483        *res = sock;
1484
1485        return 0;
1486
1487out_module_busy:
1488        err = -EAFNOSUPPORT;
1489out_module_put:
1490        sock->ops = NULL;
1491        module_put(pf->owner);
1492out_sock_release:
1493        sock_release(sock);
1494        return err;
1495
1496out_release:
1497        rcu_read_unlock();
1498        goto out_sock_release;
1499}
1500EXPORT_SYMBOL(__sock_create);
1501
1502/**
1503 *      sock_create - creates a socket
1504 *      @family: protocol family (AF_INET, ...)
1505 *      @type: communication type (SOCK_STREAM, ...)
1506 *      @protocol: protocol (0, ...)
1507 *      @res: new socket
1508 *
1509 *      A wrapper around __sock_create().
1510 *      Returns 0 or an error. This function internally uses GFP_KERNEL.
1511 */
1512
1513int sock_create(int family, int type, int protocol, struct socket **res)
1514{
1515        return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1516}
1517EXPORT_SYMBOL(sock_create);
1518
1519/**
1520 *      sock_create_kern - creates a socket (kernel space)
1521 *      @net: net namespace
1522 *      @family: protocol family (AF_INET, ...)
1523 *      @type: communication type (SOCK_STREAM, ...)
1524 *      @protocol: protocol (0, ...)
1525 *      @res: new socket
1526 *
1527 *      A wrapper around __sock_create().
1528 *      Returns 0 or an error. This function internally uses GFP_KERNEL.
1529 */
1530
1531int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1532{
1533        return __sock_create(net, family, type, protocol, res, 1);
1534}
1535EXPORT_SYMBOL(sock_create_kern);
1536
1537int __sys_socket(int family, int type, int protocol)
1538{
1539        int retval;
1540        struct socket *sock;
1541        int flags;
1542
1543        /* Check the SOCK_* constants for consistency.  */
1544        BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1545        BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1546        BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1547        BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1548
1549        flags = type & ~SOCK_TYPE_MASK;
1550        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1551                return -EINVAL;
1552        type &= SOCK_TYPE_MASK;
1553
1554        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1555                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1556
1557        retval = sock_create(family, type, protocol, &sock);
1558        if (retval < 0)
1559                return retval;
1560
1561        return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1562}
1563
1564SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1565{
1566        return __sys_socket(family, type, protocol);
1567}
1568
1569/*
1570 *      Create a pair of connected sockets.
1571 */
1572
1573int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1574{
1575        struct socket *sock1, *sock2;
1576        int fd1, fd2, err;
1577        struct file *newfile1, *newfile2;
1578        int flags;
1579
1580        flags = type & ~SOCK_TYPE_MASK;
1581        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1582                return -EINVAL;
1583        type &= SOCK_TYPE_MASK;
1584
1585        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1586                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1587
1588        /*
1589         * reserve descriptors and make sure we won't fail
1590         * to return them to userland.
1591         */
1592        fd1 = get_unused_fd_flags(flags);
1593        if (unlikely(fd1 < 0))
1594                return fd1;
1595
1596        fd2 = get_unused_fd_flags(flags);
1597        if (unlikely(fd2 < 0)) {
1598                put_unused_fd(fd1);
1599                return fd2;
1600        }
1601
1602        err = put_user(fd1, &usockvec[0]);
1603        if (err)
1604                goto out;
1605
1606        err = put_user(fd2, &usockvec[1]);
1607        if (err)
1608                goto out;
1609
1610        /*
1611         * Obtain the first socket and check if the underlying protocol
1612         * supports the socketpair call.
1613         */
1614
1615        err = sock_create(family, type, protocol, &sock1);
1616        if (unlikely(err < 0))
1617                goto out;
1618
1619        err = sock_create(family, type, protocol, &sock2);
1620        if (unlikely(err < 0)) {
1621                sock_release(sock1);
1622                goto out;
1623        }
1624
1625        err = security_socket_socketpair(sock1, sock2);
1626        if (unlikely(err)) {
1627                sock_release(sock2);
1628                sock_release(sock1);
1629                goto out;
1630        }
1631
1632        err = sock1->ops->socketpair(sock1, sock2);
1633        if (unlikely(err < 0)) {
1634                sock_release(sock2);
1635                sock_release(sock1);
1636                goto out;
1637        }
1638
1639        newfile1 = sock_alloc_file(sock1, flags, NULL);
1640        if (IS_ERR(newfile1)) {
1641                err = PTR_ERR(newfile1);
1642                sock_release(sock2);
1643                goto out;
1644        }
1645
1646        newfile2 = sock_alloc_file(sock2, flags, NULL);
1647        if (IS_ERR(newfile2)) {
1648                err = PTR_ERR(newfile2);
1649                fput(newfile1);
1650                goto out;
1651        }
1652
1653        audit_fd_pair(fd1, fd2);
1654
1655        fd_install(fd1, newfile1);
1656        fd_install(fd2, newfile2);
1657        return 0;
1658
1659out:
1660        put_unused_fd(fd2);
1661        put_unused_fd(fd1);
1662        return err;
1663}
1664
1665SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1666                int __user *, usockvec)
1667{
1668        return __sys_socketpair(family, type, protocol, usockvec);
1669}
1670
1671/*
1672 *      Bind a name to a socket. Nothing much to do here since it's
1673 *      the protocol's responsibility to handle the local address.
1674 *
1675 *      We move the socket address to kernel space before we call
1676 *      the protocol layer (having also checked the address is ok).
1677 */
1678
1679int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1680{
1681        struct socket *sock;
1682        struct sockaddr_storage address;
1683        int err, fput_needed;
1684
1685        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1686        if (sock) {
1687                err = move_addr_to_kernel(umyaddr, addrlen, &address);
1688                if (!err) {
1689                        err = security_socket_bind(sock,
1690                                                   (struct sockaddr *)&address,
1691                                                   addrlen);
1692                        if (!err)
1693                                err = sock->ops->bind(sock,
1694                                                      (struct sockaddr *)
1695                                                      &address, addrlen);
1696                }
1697                fput_light(sock->file, fput_needed);
1698        }
1699        return err;
1700}
1701
1702SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1703{
1704        return __sys_bind(fd, umyaddr, addrlen);
1705}
1706
1707/*
1708 *      Perform a listen. Basically, we allow the protocol to do anything
1709 *      necessary for a listen, and if that works, we mark the socket as
1710 *      ready for listening.
1711 */
1712
1713int __sys_listen(int fd, int backlog)
1714{
1715        struct socket *sock;
1716        int err, fput_needed;
1717        int somaxconn;
1718
1719        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1720        if (sock) {
1721                somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1722                if ((unsigned int)backlog > somaxconn)
1723                        backlog = somaxconn;
1724
1725                err = security_socket_listen(sock, backlog);
1726                if (!err)
1727                        err = sock->ops->listen(sock, backlog);
1728
1729                fput_light(sock->file, fput_needed);
1730        }
1731        return err;
1732}
1733
1734SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1735{
1736        return __sys_listen(fd, backlog);
1737}
1738
1739struct file *do_accept(struct file *file, unsigned file_flags,
1740                       struct sockaddr __user *upeer_sockaddr,
1741                       int __user *upeer_addrlen, int flags)
1742{
1743        struct socket *sock, *newsock;
1744        struct file *newfile;
1745        int err, len;
1746        struct sockaddr_storage address;
1747
1748        sock = sock_from_file(file);
1749        if (!sock)
1750                return ERR_PTR(-ENOTSOCK);
1751
1752        newsock = sock_alloc();
1753        if (!newsock)
1754                return ERR_PTR(-ENFILE);
1755
1756        newsock->type = sock->type;
1757        newsock->ops = sock->ops;
1758
1759        /*
1760         * We don't need try_module_get here, as the listening socket (sock)
1761         * has the protocol module (sock->ops->owner) held.
1762         */
1763        __module_get(newsock->ops->owner);
1764
1765        newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1766        if (IS_ERR(newfile))
1767                return newfile;
1768
1769        err = security_socket_accept(sock, newsock);
1770        if (err)
1771                goto out_fd;
1772
1773        err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1774                                        false);
1775        if (err < 0)
1776                goto out_fd;
1777
1778        if (upeer_sockaddr) {
1779                len = newsock->ops->getname(newsock,
1780                                        (struct sockaddr *)&address, 2);
1781                if (len < 0) {
1782                        err = -ECONNABORTED;
1783                        goto out_fd;
1784                }
1785                err = move_addr_to_user(&address,
1786                                        len, upeer_sockaddr, upeer_addrlen);
1787                if (err < 0)
1788                        goto out_fd;
1789        }
1790
1791        /* File flags are not inherited via accept() unlike another OSes. */
1792        return newfile;
1793out_fd:
1794        fput(newfile);
1795        return ERR_PTR(err);
1796}
1797
1798int __sys_accept4_file(struct file *file, unsigned file_flags,
1799                       struct sockaddr __user *upeer_sockaddr,
1800                       int __user *upeer_addrlen, int flags,
1801                       unsigned long nofile)
1802{
1803        struct file *newfile;
1804        int newfd;
1805
1806        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1807                return -EINVAL;
1808
1809        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1810                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1811
1812        newfd = __get_unused_fd_flags(flags, nofile);
1813        if (unlikely(newfd < 0))
1814                return newfd;
1815
1816        newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
1817                            flags);
1818        if (IS_ERR(newfile)) {
1819                put_unused_fd(newfd);
1820                return PTR_ERR(newfile);
1821        }
1822        fd_install(newfd, newfile);
1823        return newfd;
1824}
1825
1826/*
1827 *      For accept, we attempt to create a new socket, set up the link
1828 *      with the client, wake up the client, then return the new
1829 *      connected fd. We collect the address of the connector in kernel
1830 *      space and move it to user at the very end. This is unclean because
1831 *      we open the socket then return an error.
1832 *
1833 *      1003.1g adds the ability to recvmsg() to query connection pending
1834 *      status to recvmsg. We need to add that support in a way thats
1835 *      clean when we restructure accept also.
1836 */
1837
1838int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1839                  int __user *upeer_addrlen, int flags)
1840{
1841        int ret = -EBADF;
1842        struct fd f;
1843
1844        f = fdget(fd);
1845        if (f.file) {
1846                ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1847                                                upeer_addrlen, flags,
1848                                                rlimit(RLIMIT_NOFILE));
1849                fdput(f);
1850        }
1851
1852        return ret;
1853}
1854
1855SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1856                int __user *, upeer_addrlen, int, flags)
1857{
1858        return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1859}
1860
1861SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1862                int __user *, upeer_addrlen)
1863{
1864        return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1865}
1866
1867/*
1868 *      Attempt to connect to a socket with the server address.  The address
1869 *      is in user space so we verify it is OK and move it to kernel space.
1870 *
1871 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1872 *      break bindings
1873 *
1874 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1875 *      other SEQPACKET protocols that take time to connect() as it doesn't
1876 *      include the -EINPROGRESS status for such sockets.
1877 */
1878
1879int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
1880                       int addrlen, int file_flags)
1881{
1882        struct socket *sock;
1883        int err;
1884
1885        sock = sock_from_file(file);
1886        if (!sock) {
1887                err = -ENOTSOCK;
1888                goto out;
1889        }
1890
1891        err =
1892            security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1893        if (err)
1894                goto out;
1895
1896        err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1897                                 sock->file->f_flags | file_flags);
1898out:
1899        return err;
1900}
1901
1902int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1903{
1904        int ret = -EBADF;
1905        struct fd f;
1906
1907        f = fdget(fd);
1908        if (f.file) {
1909                struct sockaddr_storage address;
1910
1911                ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1912                if (!ret)
1913                        ret = __sys_connect_file(f.file, &address, addrlen, 0);
1914                fdput(f);
1915        }
1916
1917        return ret;
1918}
1919
1920SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1921                int, addrlen)
1922{
1923        return __sys_connect(fd, uservaddr, addrlen);
1924}
1925
1926/*
1927 *      Get the local address ('name') of a socket object. Move the obtained
1928 *      name to user space.
1929 */
1930
1931int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1932                      int __user *usockaddr_len)
1933{
1934        struct socket *sock;
1935        struct sockaddr_storage address;
1936        int err, fput_needed;
1937
1938        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1939        if (!sock)
1940                goto out;
1941
1942        err = security_socket_getsockname(sock);
1943        if (err)
1944                goto out_put;
1945
1946        err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1947        if (err < 0)
1948                goto out_put;
1949        /* "err" is actually length in this case */
1950        err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1951
1952out_put:
1953        fput_light(sock->file, fput_needed);
1954out:
1955        return err;
1956}
1957
1958SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1959                int __user *, usockaddr_len)
1960{
1961        return __sys_getsockname(fd, usockaddr, usockaddr_len);
1962}
1963
1964/*
1965 *      Get the remote address ('name') of a socket object. Move the obtained
1966 *      name to user space.
1967 */
1968
1969int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1970                      int __user *usockaddr_len)
1971{
1972        struct socket *sock;
1973        struct sockaddr_storage address;
1974        int err, fput_needed;
1975
1976        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1977        if (sock != NULL) {
1978                err = security_socket_getpeername(sock);
1979                if (err) {
1980                        fput_light(sock->file, fput_needed);
1981                        return err;
1982                }
1983
1984                err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1985                if (err >= 0)
1986                        /* "err" is actually length in this case */
1987                        err = move_addr_to_user(&address, err, usockaddr,
1988                                                usockaddr_len);
1989                fput_light(sock->file, fput_needed);
1990        }
1991        return err;
1992}
1993
1994SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1995                int __user *, usockaddr_len)
1996{
1997        return __sys_getpeername(fd, usockaddr, usockaddr_len);
1998}
1999
2000/*
2001 *      Send a datagram to a given address. We move the address into kernel
2002 *      space and check the user space data area is readable before invoking
2003 *      the protocol.
2004 */
2005int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2006                 struct sockaddr __user *addr,  int addr_len)
2007{
2008        struct socket *sock;
2009        struct sockaddr_storage address;
2010        int err;
2011        struct msghdr msg;
2012        struct iovec iov;
2013        int fput_needed;
2014
2015        err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
2016        if (unlikely(err))
2017                return err;
2018        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2019        if (!sock)
2020                goto out;
2021
2022        msg.msg_name = NULL;
2023        msg.msg_control = NULL;
2024        msg.msg_controllen = 0;
2025        msg.msg_namelen = 0;
2026        if (addr) {
2027                err = move_addr_to_kernel(addr, addr_len, &address);
2028                if (err < 0)
2029                        goto out_put;
2030                msg.msg_name = (struct sockaddr *)&address;
2031                msg.msg_namelen = addr_len;
2032        }
2033        if (sock->file->f_flags & O_NONBLOCK)
2034                flags |= MSG_DONTWAIT;
2035        msg.msg_flags = flags;
2036        err = sock_sendmsg(sock, &msg);
2037
2038out_put:
2039        fput_light(sock->file, fput_needed);
2040out:
2041        return err;
2042}
2043
2044SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2045                unsigned int, flags, struct sockaddr __user *, addr,
2046                int, addr_len)
2047{
2048        return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2049}
2050
2051/*
2052 *      Send a datagram down a socket.
2053 */
2054
2055SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
2056                unsigned int, flags)
2057{
2058        return __sys_sendto(fd, buff, len, flags, NULL, 0);
2059}
2060
2061/*
2062 *      Receive a frame from the socket and optionally record the address of the
2063 *      sender. We verify the buffers are writable and if needed move the
2064 *      sender address from kernel to user space.
2065 */
2066int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2067                   struct sockaddr __user *addr, int __user *addr_len)
2068{
2069        struct socket *sock;
2070        struct iovec iov;
2071        struct msghdr msg;
2072        struct sockaddr_storage address;
2073        int err, err2;
2074        int fput_needed;
2075
2076        err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2077        if (unlikely(err))
2078                return err;
2079        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2080        if (!sock)
2081                goto out;
2082
2083        msg.msg_control = NULL;
2084        msg.msg_controllen = 0;
2085        /* Save some cycles and don't copy the address if not needed */
2086        msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2087        /* We assume all kernel code knows the size of sockaddr_storage */
2088        msg.msg_namelen = 0;
2089        msg.msg_iocb = NULL;
2090        msg.msg_flags = 0;
2091        if (sock->file->f_flags & O_NONBLOCK)
2092                flags |= MSG_DONTWAIT;
2093        err = sock_recvmsg(sock, &msg, flags);
2094
2095        if (err >= 0 && addr != NULL) {
2096                err2 = move_addr_to_user(&address,
2097                                         msg.msg_namelen, addr, addr_len);
2098                if (err2 < 0)
2099                        err = err2;
2100        }
2101
2102        fput_light(sock->file, fput_needed);
2103out:
2104        return err;
2105}
2106
2107SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2108                unsigned int, flags, struct sockaddr __user *, addr,
2109                int __user *, addr_len)
2110{
2111        return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2112}
2113
2114/*
2115 *      Receive a datagram from a socket.
2116 */
2117
2118SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2119                unsigned int, flags)
2120{
2121        return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
2122}
2123
2124static bool sock_use_custom_sol_socket(const struct socket *sock)
2125{
2126        const struct sock *sk = sock->sk;
2127
2128        /* Use sock->ops->setsockopt() for MPTCP */
2129        return IS_ENABLED(CONFIG_MPTCP) &&
2130               sk->sk_protocol == IPPROTO_MPTCP &&
2131               sk->sk_type == SOCK_STREAM &&
2132               (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2133}
2134
2135/*
2136 *      Set a socket option. Because we don't know the option lengths we have
2137 *      to pass the user mode parameter for the protocols to sort out.
2138 */
2139int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2140                int optlen)
2141{
2142        sockptr_t optval = USER_SOCKPTR(user_optval);
2143        char *kernel_optval = NULL;
2144        int err, fput_needed;
2145        struct socket *sock;
2146
2147        if (optlen < 0)
2148                return -EINVAL;
2149
2150        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2151        if (!sock)
2152                return err;
2153
2154        err = security_socket_setsockopt(sock, level, optname);
2155        if (err)
2156                goto out_put;
2157
2158        if (!in_compat_syscall())
2159                err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
2160                                                     user_optval, &optlen,
2161                                                     &kernel_optval);
2162        if (err < 0)
2163                goto out_put;
2164        if (err > 0) {
2165                err = 0;
2166                goto out_put;
2167        }
2168
2169        if (kernel_optval)
2170                optval = KERNEL_SOCKPTR(kernel_optval);
2171        if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
2172                err = sock_setsockopt(sock, level, optname, optval, optlen);
2173        else if (unlikely(!sock->ops->setsockopt))
2174                err = -EOPNOTSUPP;
2175        else
2176                err = sock->ops->setsockopt(sock, level, optname, optval,
2177                                            optlen);
2178        kfree(kernel_optval);
2179out_put:
2180        fput_light(sock->file, fput_needed);
2181        return err;
2182}
2183
2184SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2185                char __user *, optval, int, optlen)
2186{
2187        return __sys_setsockopt(fd, level, optname, optval, optlen);
2188}
2189
2190INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2191                                                         int optname));
2192
2193/*
2194 *      Get a socket option. Because we don't know the option lengths we have
2195 *      to pass a user mode parameter for the protocols to sort out.
2196 */
2197int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2198                int __user *optlen)
2199{
2200        int err, fput_needed;
2201        struct socket *sock;
2202        int max_optlen;
2203
2204        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2205        if (!sock)
2206                return err;
2207
2208        err = security_socket_getsockopt(sock, level, optname);
2209        if (err)
2210                goto out_put;
2211
2212        if (!in_compat_syscall())
2213                max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2214
2215        if (level == SOL_SOCKET)
2216                err = sock_getsockopt(sock, level, optname, optval, optlen);
2217        else if (unlikely(!sock->ops->getsockopt))
2218                err = -EOPNOTSUPP;
2219        else
2220                err = sock->ops->getsockopt(sock, level, optname, optval,
2221                                            optlen);
2222
2223        if (!in_compat_syscall())
2224                err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2225                                                     optval, optlen, max_optlen,
2226                                                     err);
2227out_put:
2228        fput_light(sock->file, fput_needed);
2229        return err;
2230}
2231
2232SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2233                char __user *, optval, int __user *, optlen)
2234{
2235        return __sys_getsockopt(fd, level, optname, optval, optlen);
2236}
2237
2238/*
2239 *      Shutdown a socket.
2240 */
2241
2242int __sys_shutdown_sock(struct socket *sock, int how)
2243{
2244        int err;
2245
2246        err = security_socket_shutdown(sock, how);
2247        if (!err)
2248                err = sock->ops->shutdown(sock, how);
2249
2250        return err;
2251}
2252
2253int __sys_shutdown(int fd, int how)
2254{
2255        int err, fput_needed;
2256        struct socket *sock;
2257
2258        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2259        if (sock != NULL) {
2260                err = __sys_shutdown_sock(sock, how);
2261                fput_light(sock->file, fput_needed);
2262        }
2263        return err;
2264}
2265
2266SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2267{
2268        return __sys_shutdown(fd, how);
2269}
2270
2271/* A couple of helpful macros for getting the address of the 32/64 bit
2272 * fields which are the same type (int / unsigned) on our platforms.
2273 */
2274#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2275#define COMPAT_NAMELEN(msg)     COMPAT_MSG(msg, msg_namelen)
2276#define COMPAT_FLAGS(msg)       COMPAT_MSG(msg, msg_flags)
2277
2278struct used_address {
2279        struct sockaddr_storage name;
2280        unsigned int name_len;
2281};
2282
2283int __copy_msghdr_from_user(struct msghdr *kmsg,
2284                            struct user_msghdr __user *umsg,
2285                            struct sockaddr __user **save_addr,
2286                            struct iovec __user **uiov, size_t *nsegs)
2287{
2288        struct user_msghdr msg;
2289        ssize_t err;
2290
2291        if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2292                return -EFAULT;
2293
2294        kmsg->msg_control_is_user = true;
2295        kmsg->msg_control_user = msg.msg_control;
2296        kmsg->msg_controllen = msg.msg_controllen;
2297        kmsg->msg_flags = msg.msg_flags;
2298
2299        kmsg->msg_namelen = msg.msg_namelen;
2300        if (!msg.msg_name)
2301                kmsg->msg_namelen = 0;
2302
2303        if (kmsg->msg_namelen < 0)
2304                return -EINVAL;
2305
2306        if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
2307                kmsg->msg_namelen = sizeof(struct sockaddr_storage);
2308
2309        if (save_addr)
2310                *save_addr = msg.msg_name;
2311
2312        if (msg.msg_name && kmsg->msg_namelen) {
2313                if (!save_addr) {
2314                        err = move_addr_to_kernel(msg.msg_name,
2315                                                  kmsg->msg_namelen,
2316                                                  kmsg->msg_name);
2317                        if (err < 0)
2318                                return err;
2319                }
2320        } else {
2321                kmsg->msg_name = NULL;
2322                kmsg->msg_namelen = 0;
2323        }
2324
2325        if (msg.msg_iovlen > UIO_MAXIOV)
2326                return -EMSGSIZE;
2327
2328        kmsg->msg_iocb = NULL;
2329        *uiov = msg.msg_iov;
2330        *nsegs = msg.msg_iovlen;
2331        return 0;
2332}
2333
2334static int copy_msghdr_from_user(struct msghdr *kmsg,
2335                                 struct user_msghdr __user *umsg,
2336                                 struct sockaddr __user **save_addr,
2337                                 struct iovec **iov)
2338{
2339        struct user_msghdr msg;
2340        ssize_t err;
2341
2342        err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2343                                        &msg.msg_iovlen);
2344        if (err)
2345                return err;
2346
2347        err = import_iovec(save_addr ? READ : WRITE,
2348                            msg.msg_iov, msg.msg_iovlen,
2349                            UIO_FASTIOV, iov, &kmsg->msg_iter);
2350        return err < 0 ? err : 0;
2351}
2352
2353static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2354                           unsigned int flags, struct used_address *used_address,
2355                           unsigned int allowed_msghdr_flags)
2356{
2357        unsigned char ctl[sizeof(struct cmsghdr) + 20]
2358                                __aligned(sizeof(__kernel_size_t));
2359        /* 20 is size of ipv6_pktinfo */
2360        unsigned char *ctl_buf = ctl;
2361        int ctl_len;
2362        ssize_t err;
2363
2364        err = -ENOBUFS;
2365
2366        if (msg_sys->msg_controllen > INT_MAX)
2367                goto out;
2368        flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
2369        ctl_len = msg_sys->msg_controllen;
2370        if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
2371                err =
2372                    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
2373                                                     sizeof(ctl));
2374                if (err)
2375                        goto out;
2376                ctl_buf = msg_sys->msg_control;
2377                ctl_len = msg_sys->msg_controllen;
2378        } else if (ctl_len) {
2379                BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2380                             CMSG_ALIGN(sizeof(struct cmsghdr)));
2381                if (ctl_len > sizeof(ctl)) {
2382                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
2383                        if (ctl_buf == NULL)
2384                                goto out;
2385                }
2386                err = -EFAULT;
2387                if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
2388                        goto out_freectl;
2389                msg_sys->msg_control = ctl_buf;
2390                msg_sys->msg_control_is_user = false;
2391        }
2392        msg_sys->msg_flags = flags;
2393
2394        if (sock->file->f_flags & O_NONBLOCK)
2395                msg_sys->msg_flags |= MSG_DONTWAIT;
2396        /*
2397         * If this is sendmmsg() and current destination address is same as
2398         * previously succeeded address, omit asking LSM's decision.
2399         * used_address->name_len is initialized to UINT_MAX so that the first
2400         * destination address never matches.
2401         */
2402        if (used_address && msg_sys->msg_name &&
2403            used_address->name_len == msg_sys->msg_namelen &&
2404            !memcmp(&used_address->name, msg_sys->msg_name,
2405                    used_address->name_len)) {
2406                err = sock_sendmsg_nosec(sock, msg_sys);
2407                goto out_freectl;
2408        }
2409        err = sock_sendmsg(sock, msg_sys);
2410        /*
2411         * If this is sendmmsg() and sending to current destination address was
2412         * successful, remember it.
2413         */
2414        if (used_address && err >= 0) {
2415                used_address->name_len = msg_sys->msg_namelen;
2416                if (msg_sys->msg_name)
2417                        memcpy(&used_address->name, msg_sys->msg_name,
2418                               used_address->name_len);
2419        }
2420
2421out_freectl:
2422        if (ctl_buf != ctl)
2423                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2424out:
2425        return err;
2426}
2427
2428int sendmsg_copy_msghdr(struct msghdr *msg,
2429                        struct user_msghdr __user *umsg, unsigned flags,
2430                        struct iovec **iov)
2431{
2432        int err;
2433
2434        if (flags & MSG_CMSG_COMPAT) {
2435                struct compat_msghdr __user *msg_compat;
2436
2437                msg_compat = (struct compat_msghdr __user *) umsg;
2438                err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2439        } else {
2440                err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2441        }
2442        if (err < 0)
2443                return err;
2444
2445        return 0;
2446}
2447
2448static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2449                         struct msghdr *msg_sys, unsigned int flags,
2450                         struct used_address *used_address,
2451                         unsigned int allowed_msghdr_flags)
2452{
2453        struct sockaddr_storage address;
2454        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2455        ssize_t err;
2456
2457        msg_sys->msg_name = &address;
2458
2459        err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2460        if (err < 0)
2461                return err;
2462
2463        err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2464                                allowed_msghdr_flags);
2465        kfree(iov);
2466        return err;
2467}
2468
2469/*
2470 *      BSD sendmsg interface
2471 */
2472long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
2473                        unsigned int flags)
2474{
2475        return ____sys_sendmsg(sock, msg, flags, NULL, 0);
2476}
2477
2478long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2479                   bool forbid_cmsg_compat)
2480{
2481        int fput_needed, err;
2482        struct msghdr msg_sys;
2483        struct socket *sock;
2484
2485        if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2486                return -EINVAL;
2487
2488        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2489        if (!sock)
2490                goto out;
2491
2492        err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2493
2494        fput_light(sock->file, fput_needed);
2495out:
2496        return err;
2497}
2498
2499SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
2500{
2501        return __sys_sendmsg(fd, msg, flags, true);
2502}
2503
2504/*
2505 *      Linux sendmmsg interface
2506 */
2507
2508int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2509                   unsigned int flags, bool forbid_cmsg_compat)
2510{
2511        int fput_needed, err, datagrams;
2512        struct socket *sock;
2513        struct mmsghdr __user *entry;
2514        struct compat_mmsghdr __user *compat_entry;
2515        struct msghdr msg_sys;
2516        struct used_address used_address;
2517        unsigned int oflags = flags;
2518
2519        if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2520                return -EINVAL;
2521
2522        if (vlen > UIO_MAXIOV)
2523                vlen = UIO_MAXIOV;
2524
2525        datagrams = 0;
2526
2527        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2528        if (!sock)
2529                return err;
2530
2531        used_address.name_len = UINT_MAX;
2532        entry = mmsg;
2533        compat_entry = (struct compat_mmsghdr __user *)mmsg;
2534        err = 0;
2535        flags |= MSG_BATCH;
2536
2537        while (datagrams < vlen) {
2538                if (datagrams == vlen - 1)
2539                        flags = oflags;
2540
2541                if (MSG_CMSG_COMPAT & flags) {
2542                        err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
2543                                             &msg_sys, flags, &used_address, MSG_EOR);
2544                        if (err < 0)
2545                                break;
2546                        err = __put_user(err, &compat_entry->msg_len);
2547                        ++compat_entry;
2548                } else {
2549                        err = ___sys_sendmsg(sock,
2550                                             (struct user_msghdr __user *)entry,
2551                                             &msg_sys, flags, &used_address, MSG_EOR);
2552                        if (err < 0)
2553                                break;
2554                        err = put_user(err, &entry->msg_len);
2555                        ++entry;
2556                }
2557
2558                if (err)
2559                        break;
2560                ++datagrams;
2561                if (msg_data_left(&msg_sys))
2562                        break;
2563                cond_resched();
2564        }
2565
2566        fput_light(sock->file, fput_needed);
2567
2568        /* We only return an error if no datagrams were able to be sent */
2569        if (datagrams != 0)
2570                return datagrams;
2571
2572        return err;
2573}
2574
2575SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2576                unsigned int, vlen, unsigned int, flags)
2577{
2578        return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
2579}
2580
2581int recvmsg_copy_msghdr(struct msghdr *msg,
2582                        struct user_msghdr __user *umsg, unsigned flags,
2583                        struct sockaddr __user **uaddr,
2584                        struct iovec **iov)
2585{
2586        ssize_t err;
2587
2588        if (MSG_CMSG_COMPAT & flags) {
2589                struct compat_msghdr __user *msg_compat;
2590
2591                msg_compat = (struct compat_msghdr __user *) umsg;
2592                err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2593        } else {
2594                err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2595        }
2596        if (err < 0)
2597                return err;
2598
2599        return 0;
2600}
2601
2602static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2603                           struct user_msghdr __user *msg,
2604                           struct sockaddr __user *uaddr,
2605                           unsigned int flags, int nosec)
2606{
2607        struct compat_msghdr __user *msg_compat =
2608                                        (struct compat_msghdr __user *) msg;
2609        int __user *uaddr_len = COMPAT_NAMELEN(msg);
2610        struct sockaddr_storage addr;
2611        unsigned long cmsg_ptr;
2612        int len;
2613        ssize_t err;
2614
2615        msg_sys->msg_name = &addr;
2616        cmsg_ptr = (unsigned long)msg_sys->msg_control;
2617        msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2618
2619        /* We assume all kernel code knows the size of sockaddr_storage */
2620        msg_sys->msg_namelen = 0;
2621
2622        if (sock->file->f_flags & O_NONBLOCK)
2623                flags |= MSG_DONTWAIT;
2624
2625        if (unlikely(nosec))
2626                err = sock_recvmsg_nosec(sock, msg_sys, flags);
2627        else
2628                err = sock_recvmsg(sock, msg_sys, flags);
2629
2630        if (err < 0)
2631                goto out;
2632        len = err;
2633
2634        if (uaddr != NULL) {
2635                err = move_addr_to_user(&addr,
2636                                        msg_sys->msg_namelen, uaddr,
2637                                        uaddr_len);
2638                if (err < 0)
2639                        goto out;
2640        }
2641        err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2642                         COMPAT_FLAGS(msg));
2643        if (err)
2644                goto out;
2645        if (MSG_CMSG_COMPAT & flags)
2646                err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2647                                 &msg_compat->msg_controllen);
2648        else
2649                err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2650                                 &msg->msg_controllen);
2651        if (err)
2652                goto out;
2653        err = len;
2654out:
2655        return err;
2656}
2657
2658static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2659                         struct msghdr *msg_sys, unsigned int flags, int nosec)
2660{
2661        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2662        /* user mode address pointers */
2663        struct sockaddr __user *uaddr;
2664        ssize_t err;
2665
2666        err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2667        if (err < 0)
2668                return err;
2669
2670        err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
2671        kfree(iov);
2672        return err;
2673}
2674
2675/*
2676 *      BSD recvmsg interface
2677 */
2678
2679long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2680                        struct user_msghdr __user *umsg,
2681                        struct sockaddr __user *uaddr, unsigned int flags)
2682{
2683        return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
2684}
2685
2686long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2687                   bool forbid_cmsg_compat)
2688{
2689        int fput_needed, err;
2690        struct msghdr msg_sys;
2691        struct socket *sock;
2692
2693        if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2694                return -EINVAL;
2695
2696        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2697        if (!sock)
2698                goto out;
2699
2700        err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2701
2702        fput_light(sock->file, fput_needed);
2703out:
2704        return err;
2705}
2706
2707SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2708                unsigned int, flags)
2709{
2710        return __sys_recvmsg(fd, msg, flags, true);
2711}
2712
2713/*
2714 *     Linux recvmmsg interface
2715 */
2716
2717static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2718                          unsigned int vlen, unsigned int flags,
2719                          struct timespec64 *timeout)
2720{
2721        int fput_needed, err, datagrams;
2722        struct socket *sock;
2723        struct mmsghdr __user *entry;
2724        struct compat_mmsghdr __user *compat_entry;
2725        struct msghdr msg_sys;
2726        struct timespec64 end_time;
2727        struct timespec64 timeout64;
2728
2729        if (timeout &&
2730            poll_select_set_timeout(&end_time, timeout->tv_sec,
2731                                    timeout->tv_nsec))
2732                return -EINVAL;
2733
2734        datagrams = 0;
2735
2736        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2737        if (!sock)
2738                return err;
2739
2740        if (likely(!(flags & MSG_ERRQUEUE))) {
2741                err = sock_error(sock->sk);
2742                if (err) {
2743                        datagrams = err;
2744                        goto out_put;
2745                }
2746        }
2747
2748        entry = mmsg;
2749        compat_entry = (struct compat_mmsghdr __user *)mmsg;
2750
2751        while (datagrams < vlen) {
2752                /*
2753                 * No need to ask LSM for more than the first datagram.
2754                 */
2755                if (MSG_CMSG_COMPAT & flags) {
2756                        err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
2757                                             &msg_sys, flags & ~MSG_WAITFORONE,
2758                                             datagrams);
2759                        if (err < 0)
2760                                break;
2761                        err = __put_user(err, &compat_entry->msg_len);
2762                        ++compat_entry;
2763                } else {
2764                        err = ___sys_recvmsg(sock,
2765                                             (struct user_msghdr __user *)entry,
2766                                             &msg_sys, flags & ~MSG_WAITFORONE,
2767                                             datagrams);
2768                        if (err < 0)
2769                                break;
2770                        err = put_user(err, &entry->msg_len);
2771                        ++entry;
2772                }
2773
2774                if (err)
2775                        break;
2776                ++datagrams;
2777
2778                /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2779                if (flags & MSG_WAITFORONE)
2780                        flags |= MSG_DONTWAIT;
2781
2782                if (timeout) {
2783                        ktime_get_ts64(&timeout64);
2784                        *timeout = timespec64_sub(end_time, timeout64);
2785                        if (timeout->tv_sec < 0) {
2786                                timeout->tv_sec = timeout->tv_nsec = 0;
2787                                break;
2788                        }
2789
2790                        /* Timeout, return less than vlen datagrams */
2791                        if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2792                                break;
2793                }
2794
2795                /* Out of band data, return right away */
2796                if (msg_sys.msg_flags & MSG_OOB)
2797                        break;
2798                cond_resched();
2799        }
2800
2801        if (err == 0)
2802                goto out_put;
2803
2804        if (datagrams == 0) {
2805                datagrams = err;
2806                goto out_put;
2807        }
2808
2809        /*
2810         * We may return less entries than requested (vlen) if the
2811         * sock is non block and there aren't enough datagrams...
2812         */
2813        if (err != -EAGAIN) {
2814                /*
2815                 * ... or  if recvmsg returns an error after we
2816                 * received some datagrams, where we record the
2817                 * error to return on the next call or if the
2818                 * app asks about it using getsockopt(SO_ERROR).
2819                 */
2820                sock->sk->sk_err = -err;
2821        }
2822out_put:
2823        fput_light(sock->file, fput_needed);
2824
2825        return datagrams;
2826}
2827
2828int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2829                   unsigned int vlen, unsigned int flags,
2830                   struct __kernel_timespec __user *timeout,
2831                   struct old_timespec32 __user *timeout32)
2832{
2833        int datagrams;
2834        struct timespec64 timeout_sys;
2835
2836        if (timeout && get_timespec64(&timeout_sys, timeout))
2837                return -EFAULT;
2838
2839        if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
2840                return -EFAULT;
2841
2842        if (!timeout && !timeout32)
2843                return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2844
2845        datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2846
2847        if (datagrams <= 0)
2848                return datagrams;
2849
2850        if (timeout && put_timespec64(&timeout_sys, timeout))
2851                datagrams = -EFAULT;
2852
2853        if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
2854                datagrams = -EFAULT;
2855
2856        return datagrams;
2857}
2858
2859SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2860                unsigned int, vlen, unsigned int, flags,
2861                struct __kernel_timespec __user *, timeout)
2862{
2863        if (flags & MSG_CMSG_COMPAT)
2864                return -EINVAL;
2865
2866        return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2867}
2868
2869#ifdef CONFIG_COMPAT_32BIT_TIME
2870SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2871                unsigned int, vlen, unsigned int, flags,
2872                struct old_timespec32 __user *, timeout)
2873{
2874        if (flags & MSG_CMSG_COMPAT)
2875                return -EINVAL;
2876
2877        return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
2878}
2879#endif
2880
2881#ifdef __ARCH_WANT_SYS_SOCKETCALL
2882/* Argument list sizes for sys_socketcall */
2883#define AL(x) ((x) * sizeof(unsigned long))
2884static const unsigned char nargs[21] = {
2885        AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2886        AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2887        AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2888        AL(4), AL(5), AL(4)
2889};
2890
2891#undef AL
2892
2893/*
2894 *      System call vectors.
2895 *
2896 *      Argument checking cleaned up. Saved 20% in size.
2897 *  This function doesn't need to set the kernel lock because
2898 *  it is set by the callees.
2899 */
2900
2901SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2902{
2903        unsigned long a[AUDITSC_ARGS];
2904        unsigned long a0, a1;
2905        int err;
2906        unsigned int len;
2907
2908        if (call < 1 || call > SYS_SENDMMSG)
2909                return -EINVAL;
2910        call = array_index_nospec(call, SYS_SENDMMSG + 1);
2911
2912        len = nargs[call];
2913        if (len > sizeof(a))
2914                return -EINVAL;
2915
2916        /* copy_from_user should be SMP safe. */
2917        if (copy_from_user(a, args, len))
2918                return -EFAULT;
2919
2920        err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2921        if (err)
2922                return err;
2923
2924        a0 = a[0];
2925        a1 = a[1];
2926
2927        switch (call) {
2928        case SYS_SOCKET:
2929                err = __sys_socket(a0, a1, a[2]);
2930                break;
2931        case SYS_BIND:
2932                err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2933                break;
2934        case SYS_CONNECT:
2935                err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2936                break;
2937        case SYS_LISTEN:
2938                err = __sys_listen(a0, a1);
2939                break;
2940        case SYS_ACCEPT:
2941                err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2942                                    (int __user *)a[2], 0);
2943                break;
2944        case SYS_GETSOCKNAME:
2945                err =
2946                    __sys_getsockname(a0, (struct sockaddr __user *)a1,
2947                                      (int __user *)a[2]);
2948                break;
2949        case SYS_GETPEERNAME:
2950                err =
2951                    __sys_getpeername(a0, (struct sockaddr __user *)a1,
2952                                      (int __user *)a[2]);
2953                break;
2954        case SYS_SOCKETPAIR:
2955                err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2956                break;
2957        case SYS_SEND:
2958                err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2959                                   NULL, 0);
2960                break;
2961        case SYS_SENDTO:
2962                err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2963                                   (struct sockaddr __user *)a[4], a[5]);
2964                break;
2965        case SYS_RECV:
2966                err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2967                                     NULL, NULL);
2968                break;
2969        case SYS_RECVFROM:
2970                err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2971                                     (struct sockaddr __user *)a[4],
2972                                     (int __user *)a[5]);
2973                break;
2974        case SYS_SHUTDOWN:
2975                err = __sys_shutdown(a0, a1);
2976                break;
2977        case SYS_SETSOCKOPT:
2978                err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2979                                       a[4]);
2980                break;
2981        case SYS_GETSOCKOPT:
2982                err =
2983                    __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2984                                     (int __user *)a[4]);
2985                break;
2986        case SYS_SENDMSG:
2987                err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2988                                    a[2], true);
2989                break;
2990        case SYS_SENDMMSG:
2991                err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2992                                     a[3], true);
2993                break;
2994        case SYS_RECVMSG:
2995                err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2996                                    a[2], true);
2997                break;
2998        case SYS_RECVMMSG:
2999                if (IS_ENABLED(CONFIG_64BIT))
3000                        err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3001                                             a[2], a[3],
3002                                             (struct __kernel_timespec __user *)a[4],
3003                                             NULL);
3004                else
3005                        err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3006                                             a[2], a[3], NULL,
3007                                             (struct old_timespec32 __user *)a[4]);
3008                break;
3009        case SYS_ACCEPT4:
3010                err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3011                                    (int __user *)a[2], a[3]);
3012                break;
3013        default:
3014                err = -EINVAL;
3015                break;
3016        }
3017        return err;
3018}
3019
3020#endif                          /* __ARCH_WANT_SYS_SOCKETCALL */
3021
3022/**
3023 *      sock_register - add a socket protocol handler
3024 *      @ops: description of protocol
3025 *
3026 *      This function is called by a protocol handler that wants to
3027 *      advertise its address family, and have it linked into the
3028 *      socket interface. The value ops->family corresponds to the
3029 *      socket system call protocol family.
3030 */
3031int sock_register(const struct net_proto_family *ops)
3032{
3033        int err;
3034
3035        if (ops->family >= NPROTO) {
3036                pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
3037                return -ENOBUFS;
3038        }
3039
3040        spin_lock(&net_family_lock);
3041        if (rcu_dereference_protected(net_families[ops->family],
3042                                      lockdep_is_held(&net_family_lock)))
3043                err = -EEXIST;
3044        else {
3045                rcu_assign_pointer(net_families[ops->family], ops);
3046                err = 0;
3047        }
3048        spin_unlock(&net_family_lock);
3049
3050        pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
3051        return err;
3052}
3053EXPORT_SYMBOL(sock_register);
3054
3055/**
3056 *      sock_unregister - remove a protocol handler
3057 *      @family: protocol family to remove
3058 *
3059 *      This function is called by a protocol handler that wants to
3060 *      remove its address family, and have it unlinked from the
3061 *      new socket creation.
3062 *
3063 *      If protocol handler is a module, then it can use module reference
3064 *      counts to protect against new references. If protocol handler is not
3065 *      a module then it needs to provide its own protection in
3066 *      the ops->create routine.
3067 */
3068void sock_unregister(int family)
3069{
3070        BUG_ON(family < 0 || family >= NPROTO);
3071
3072        spin_lock(&net_family_lock);
3073        RCU_INIT_POINTER(net_families[family], NULL);
3074        spin_unlock(&net_family_lock);
3075
3076        synchronize_rcu();
3077
3078        pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
3079}
3080EXPORT_SYMBOL(sock_unregister);
3081
3082bool sock_is_registered(int family)
3083{
3084        return family < NPROTO && rcu_access_pointer(net_families[family]);
3085}
3086
3087static int __init sock_init(void)
3088{
3089        int err;
3090        /*
3091         *      Initialize the network sysctl infrastructure.
3092         */
3093        err = net_sysctl_init();
3094        if (err)
3095                goto out;
3096
3097        /*
3098         *      Initialize skbuff SLAB cache
3099         */
3100        skb_init();
3101
3102        /*
3103         *      Initialize the protocols module.
3104         */
3105
3106        init_inodecache();
3107
3108        err = register_filesystem(&sock_fs_type);
3109        if (err)
3110                goto out;
3111        sock_mnt = kern_mount(&sock_fs_type);
3112        if (IS_ERR(sock_mnt)) {
3113                err = PTR_ERR(sock_mnt);
3114                goto out_mount;
3115        }
3116
3117        /* The real protocol initialization is performed in later initcalls.
3118         */
3119
3120#ifdef CONFIG_NETFILTER
3121        err = netfilter_init();
3122        if (err)
3123                goto out;
3124#endif
3125
3126        ptp_classifier_init();
3127
3128out:
3129        return err;
3130
3131out_mount:
3132        unregister_filesystem(&sock_fs_type);
3133        goto out;
3134}
3135
3136core_initcall(sock_init);       /* early initcall */
3137
3138#ifdef CONFIG_PROC_FS
3139void socket_seq_show(struct seq_file *seq)
3140{
3141        seq_printf(seq, "sockets: used %d\n",
3142                   sock_inuse_get(seq->private));
3143}
3144#endif                          /* CONFIG_PROC_FS */
3145
3146/* Handle the fact that while struct ifreq has the same *layout* on
3147 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3148 * which are handled elsewhere, it still has different *size* due to
3149 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3150 * resulting in struct ifreq being 32 and 40 bytes respectively).
3151 * As a result, if the struct happens to be at the end of a page and
3152 * the next page isn't readable/writable, we get a fault. To prevent
3153 * that, copy back and forth to the full size.
3154 */
3155int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
3156{
3157        if (in_compat_syscall()) {
3158                struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
3159
3160                memset(ifr, 0, sizeof(*ifr));
3161                if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3162                        return -EFAULT;
3163
3164                if (ifrdata)
3165                        *ifrdata = compat_ptr(ifr32->ifr_data);
3166
3167                return 0;
3168        }
3169
3170        if (copy_from_user(ifr, arg, sizeof(*ifr)))
3171                return -EFAULT;
3172
3173        if (ifrdata)
3174                *ifrdata = ifr->ifr_data;
3175
3176        return 0;
3177}
3178EXPORT_SYMBOL(get_user_ifreq);
3179
3180int put_user_ifreq(struct ifreq *ifr, void __user *arg)
3181{
3182        size_t size = sizeof(*ifr);
3183
3184        if (in_compat_syscall())
3185                size = sizeof(struct compat_ifreq);
3186
3187        if (copy_to_user(arg, ifr, size))
3188                return -EFAULT;
3189
3190        return 0;
3191}
3192EXPORT_SYMBOL(put_user_ifreq);
3193
3194#ifdef CONFIG_COMPAT
3195static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3196{
3197        compat_uptr_t uptr32;
3198        struct ifreq ifr;
3199        void __user *saved;
3200        int err;
3201
3202        if (get_user_ifreq(&ifr, NULL, uifr32))
3203                return -EFAULT;
3204
3205        if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3206                return -EFAULT;
3207
3208        saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3209        ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
3210
3211        err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
3212        if (!err) {
3213                ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3214                if (put_user_ifreq(&ifr, uifr32))
3215                        err = -EFAULT;
3216        }
3217        return err;
3218}
3219
3220/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3221static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
3222                                 struct compat_ifreq __user *u_ifreq32)
3223{
3224        struct ifreq ifreq;
3225        void __user *data;
3226
3227        if (!is_socket_ioctl_cmd(cmd))
3228                return -ENOTTY;
3229        if (get_user_ifreq(&ifreq, &data, u_ifreq32))
3230                return -EFAULT;
3231        ifreq.ifr_data = data;
3232
3233        return dev_ioctl(net, cmd, &ifreq, data, NULL);
3234}
3235
3236/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3237 * for some operations; this forces use of the newer bridge-utils that
3238 * use compatible ioctls
3239 */
3240static int old_bridge_ioctl(compat_ulong_t __user *argp)
3241{
3242        compat_ulong_t tmp;
3243
3244        if (get_user(tmp, argp))
3245                return -EFAULT;
3246        if (tmp == BRCTL_GET_VERSION)
3247                return BRCTL_VERSION + 1;
3248        return -EINVAL;
3249}
3250
3251static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3252                         unsigned int cmd, unsigned long arg)
3253{
3254        void __user *argp = compat_ptr(arg);
3255        struct sock *sk = sock->sk;
3256        struct net *net = sock_net(sk);
3257
3258        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3259                return sock_ioctl(file, cmd, (unsigned long)argp);
3260
3261        switch (cmd) {
3262        case SIOCSIFBR:
3263        case SIOCGIFBR:
3264                return old_bridge_ioctl(argp);
3265        case SIOCWANDEV:
3266                return compat_siocwandev(net, argp);
3267        case SIOCGSTAMP_OLD:
3268        case SIOCGSTAMPNS_OLD:
3269                if (!sock->ops->gettstamp)
3270                        return -ENOIOCTLCMD;
3271                return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3272                                            !COMPAT_USE_64BIT_TIME);
3273
3274        case SIOCETHTOOL:
3275        case SIOCBONDSLAVEINFOQUERY:
3276        case SIOCBONDINFOQUERY:
3277        case SIOCSHWTSTAMP:
3278        case SIOCGHWTSTAMP:
3279                return compat_ifr_data_ioctl(net, cmd, argp);
3280
3281        case FIOSETOWN:
3282        case SIOCSPGRP:
3283        case FIOGETOWN:
3284        case SIOCGPGRP:
3285        case SIOCBRADDBR:
3286        case SIOCBRDELBR:
3287        case SIOCGIFVLAN:
3288        case SIOCSIFVLAN:
3289        case SIOCGSKNS:
3290        case SIOCGSTAMP_NEW:
3291        case SIOCGSTAMPNS_NEW:
3292        case SIOCGIFCONF:
3293                return sock_ioctl(file, cmd, arg);
3294
3295        case SIOCGIFFLAGS:
3296        case SIOCSIFFLAGS:
3297        case SIOCGIFMAP:
3298        case SIOCSIFMAP:
3299        case SIOCGIFMETRIC:
3300        case SIOCSIFMETRIC:
3301        case SIOCGIFMTU:
3302        case SIOCSIFMTU:
3303        case SIOCGIFMEM:
3304        case SIOCSIFMEM:
3305        case SIOCGIFHWADDR:
3306        case SIOCSIFHWADDR:
3307        case SIOCADDMULTI:
3308        case SIOCDELMULTI:
3309        case SIOCGIFINDEX:
3310        case SIOCGIFADDR:
3311        case SIOCSIFADDR:
3312        case SIOCSIFHWBROADCAST:
3313        case SIOCDIFADDR:
3314        case SIOCGIFBRDADDR:
3315        case SIOCSIFBRDADDR:
3316        case SIOCGIFDSTADDR:
3317        case SIOCSIFDSTADDR:
3318        case SIOCGIFNETMASK:
3319        case SIOCSIFNETMASK:
3320        case SIOCSIFPFLAGS:
3321        case SIOCGIFPFLAGS:
3322        case SIOCGIFTXQLEN:
3323        case SIOCSIFTXQLEN:
3324        case SIOCBRADDIF:
3325        case SIOCBRDELIF:
3326        case SIOCGIFNAME:
3327        case SIOCSIFNAME:
3328        case SIOCGMIIPHY:
3329        case SIOCGMIIREG:
3330        case SIOCSMIIREG:
3331        case SIOCBONDENSLAVE:
3332        case SIOCBONDRELEASE:
3333        case SIOCBONDSETHWADDR:
3334        case SIOCBONDCHANGEACTIVE:
3335        case SIOCSARP:
3336        case SIOCGARP:
3337        case SIOCDARP:
3338        case SIOCOUTQ:
3339        case SIOCOUTQNSD:
3340        case SIOCATMARK:
3341                return sock_do_ioctl(net, sock, cmd, arg);
3342        }
3343
3344        return -ENOIOCTLCMD;
3345}
3346
3347static long compat_sock_ioctl(struct file *file, unsigned int cmd,
3348                              unsigned long arg)
3349{
3350        struct socket *sock = file->private_data;
3351        int ret = -ENOIOCTLCMD;
3352        struct sock *sk;
3353        struct net *net;
3354
3355        sk = sock->sk;
3356        net = sock_net(sk);
3357
3358        if (sock->ops->compat_ioctl)
3359                ret = sock->ops->compat_ioctl(sock, cmd, arg);
3360
3361        if (ret == -ENOIOCTLCMD &&
3362            (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3363                ret = compat_wext_handle_ioctl(net, cmd, arg);
3364
3365        if (ret == -ENOIOCTLCMD)
3366                ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3367
3368        return ret;
3369}
3370#endif
3371
3372/**
3373 *      kernel_bind - bind an address to a socket (kernel space)
3374 *      @sock: socket
3375 *      @addr: address
3376 *      @addrlen: length of address
3377 *
3378 *      Returns 0 or an error.
3379 */
3380
3381int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3382{
3383        return sock->ops->bind(sock, addr, addrlen);
3384}
3385EXPORT_SYMBOL(kernel_bind);
3386
3387/**
3388 *      kernel_listen - move socket to listening state (kernel space)
3389 *      @sock: socket
3390 *      @backlog: pending connections queue size
3391 *
3392 *      Returns 0 or an error.
3393 */
3394
3395int kernel_listen(struct socket *sock, int backlog)
3396{
3397        return sock->ops->listen(sock, backlog);
3398}
3399EXPORT_SYMBOL(kernel_listen);
3400
3401/**
3402 *      kernel_accept - accept a connection (kernel space)
3403 *      @sock: listening socket
3404 *      @newsock: new connected socket
3405 *      @flags: flags
3406 *
3407 *      @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3408 *      If it fails, @newsock is guaranteed to be %NULL.
3409 *      Returns 0 or an error.
3410 */
3411
3412int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3413{
3414        struct sock *sk = sock->sk;
3415        int err;
3416
3417        err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3418                               newsock);
3419        if (err < 0)
3420                goto done;
3421
3422        err = sock->ops->accept(sock, *newsock, flags, true);
3423        if (err < 0) {
3424                sock_release(*newsock);
3425                *newsock = NULL;
3426                goto done;
3427        }
3428
3429        (*newsock)->ops = sock->ops;
3430        __module_get((*newsock)->ops->owner);
3431
3432done:
3433        return err;
3434}
3435EXPORT_SYMBOL(kernel_accept);
3436
3437/**
3438 *      kernel_connect - connect a socket (kernel space)
3439 *      @sock: socket
3440 *      @addr: address
3441 *      @addrlen: address length
3442 *      @flags: flags (O_NONBLOCK, ...)
3443 *
3444 *      For datagram sockets, @addr is the address to which datagrams are sent
3445 *      by default, and the only address from which datagrams are received.
3446 *      For stream sockets, attempts to connect to @addr.
3447 *      Returns 0 or an error code.
3448 */
3449
3450int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3451                   int flags)
3452{
3453        return sock->ops->connect(sock, addr, addrlen, flags);
3454}
3455EXPORT_SYMBOL(kernel_connect);
3456
3457/**
3458 *      kernel_getsockname - get the address which the socket is bound (kernel space)
3459 *      @sock: socket
3460 *      @addr: address holder
3461 *
3462 *      Fills the @addr pointer with the address which the socket is bound.
3463 *      Returns 0 or an error code.
3464 */
3465
3466int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
3467{
3468        return sock->ops->getname(sock, addr, 0);
3469}
3470EXPORT_SYMBOL(kernel_getsockname);
3471
3472/**
3473 *      kernel_getpeername - get the address which the socket is connected (kernel space)
3474 *      @sock: socket
3475 *      @addr: address holder
3476 *
3477 *      Fills the @addr pointer with the address which the socket is connected.
3478 *      Returns 0 or an error code.
3479 */
3480
3481int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
3482{
3483        return sock->ops->getname(sock, addr, 1);
3484}
3485EXPORT_SYMBOL(kernel_getpeername);
3486
3487/**
3488 *      kernel_sendpage - send a &page through a socket (kernel space)
3489 *      @sock: socket
3490 *      @page: page
3491 *      @offset: page offset
3492 *      @size: total size in bytes
3493 *      @flags: flags (MSG_DONTWAIT, ...)
3494 *
3495 *      Returns the total amount sent in bytes or an error.
3496 */
3497
3498int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3499                    size_t size, int flags)
3500{
3501        if (sock->ops->sendpage) {
3502                /* Warn in case the improper page to zero-copy send */
3503                WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
3504                return sock->ops->sendpage(sock, page, offset, size, flags);
3505        }
3506        return sock_no_sendpage(sock, page, offset, size, flags);
3507}
3508EXPORT_SYMBOL(kernel_sendpage);
3509
3510/**
3511 *      kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3512 *      @sk: sock
3513 *      @page: page
3514 *      @offset: page offset
3515 *      @size: total size in bytes
3516 *      @flags: flags (MSG_DONTWAIT, ...)
3517 *
3518 *      Returns the total amount sent in bytes or an error.
3519 *      Caller must hold @sk.
3520 */
3521
3522int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3523                           size_t size, int flags)
3524{
3525        struct socket *sock = sk->sk_socket;
3526
3527        if (sock->ops->sendpage_locked)
3528                return sock->ops->sendpage_locked(sk, page, offset, size,
3529                                                  flags);
3530
3531        return sock_no_sendpage_locked(sk, page, offset, size, flags);
3532}
3533EXPORT_SYMBOL(kernel_sendpage_locked);
3534
3535/**
3536 *      kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
3537 *      @sock: socket
3538 *      @how: connection part
3539 *
3540 *      Returns 0 or an error.
3541 */
3542
3543int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3544{
3545        return sock->ops->shutdown(sock, how);
3546}
3547EXPORT_SYMBOL(kernel_sock_shutdown);
3548
3549/**
3550 *      kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3551 *      @sk: socket
3552 *
3553 *      This routine returns the IP overhead imposed by a socket i.e.
3554 *      the length of the underlying IP header, depending on whether
3555 *      this is an IPv4 or IPv6 socket and the length from IP options turned
3556 *      on at the socket. Assumes that the caller has a lock on the socket.
3557 */
3558
3559u32 kernel_sock_ip_overhead(struct sock *sk)
3560{
3561        struct inet_sock *inet;
3562        struct ip_options_rcu *opt;
3563        u32 overhead = 0;
3564#if IS_ENABLED(CONFIG_IPV6)
3565        struct ipv6_pinfo *np;
3566        struct ipv6_txoptions *optv6 = NULL;
3567#endif /* IS_ENABLED(CONFIG_IPV6) */
3568
3569        if (!sk)
3570                return overhead;
3571
3572        switch (sk->sk_family) {
3573        case AF_INET:
3574                inet = inet_sk(sk);
3575                overhead += sizeof(struct iphdr);
3576                opt = rcu_dereference_protected(inet->inet_opt,
3577                                                sock_owned_by_user(sk));
3578                if (opt)
3579                        overhead += opt->opt.optlen;
3580                return overhead;
3581#if IS_ENABLED(CONFIG_IPV6)
3582        case AF_INET6:
3583                np = inet6_sk(sk);
3584                overhead += sizeof(struct ipv6hdr);
3585                if (np)
3586                        optv6 = rcu_dereference_protected(np->opt,
3587                                                          sock_owned_by_user(sk));
3588                if (optv6)
3589                        overhead += (optv6->opt_flen + optv6->opt_nflen);
3590                return overhead;
3591#endif /* IS_ENABLED(CONFIG_IPV6) */
3592        default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3593                return overhead;
3594        }
3595}
3596EXPORT_SYMBOL(kernel_sock_ip_overhead);
3597