linux/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *              Tigran Aivazian :       Made listen(2) backlog sanity checks
  46 *                                      protocol-independent
  47 *
  48 *
  49 *              This program is free software; you can redistribute it and/or
  50 *              modify it under the terms of the GNU General Public License
  51 *              as published by the Free Software Foundation; either version
  52 *              2 of the License, or (at your option) any later version.
  53 *
  54 *
  55 *      This module is effectively the top level interface to the BSD socket
  56 *      paradigm.
  57 *
  58 *      Based upon Swansea University Computer Society NET3.039
  59 */
  60
  61#include <linux/mm.h>
  62#include <linux/socket.h>
  63#include <linux/file.h>
  64#include <linux/net.h>
  65#include <linux/interrupt.h>
  66#include <linux/thread_info.h>
  67#include <linux/rcupdate.h>
  68#include <linux/netdevice.h>
  69#include <linux/proc_fs.h>
  70#include <linux/seq_file.h>
  71#include <linux/mutex.h>
  72#include <linux/if_bridge.h>
  73#include <linux/if_frad.h>
  74#include <linux/if_vlan.h>
  75#include <linux/ptp_classify.h>
  76#include <linux/init.h>
  77#include <linux/poll.h>
  78#include <linux/cache.h>
  79#include <linux/module.h>
  80#include <linux/highmem.h>
  81#include <linux/mount.h>
  82#include <linux/security.h>
  83#include <linux/syscalls.h>
  84#include <linux/compat.h>
  85#include <linux/kmod.h>
  86#include <linux/audit.h>
  87#include <linux/wireless.h>
  88#include <linux/nsproxy.h>
  89#include <linux/magic.h>
  90#include <linux/slab.h>
  91#include <linux/xattr.h>
  92
  93#include <asm/uaccess.h>
  94#include <asm/unistd.h>
  95
  96#include <net/compat.h>
  97#include <net/wext.h>
  98#include <net/cls_cgroup.h>
  99
 100#include <net/sock.h>
 101#include <linux/netfilter.h>
 102
 103#include <linux/if_tun.h>
 104#include <linux/ipv6_route.h>
 105#include <linux/route.h>
 106#include <linux/sockios.h>
 107#include <linux/atalk.h>
 108#include <net/busy_poll.h>
 109#include <linux/errqueue.h>
 110
 111#ifdef CONFIG_NET_RX_BUSY_POLL
 112unsigned int sysctl_net_busy_read __read_mostly;
 113unsigned int sysctl_net_busy_poll __read_mostly;
 114#endif
 115
 116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
 117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 119
 120static int sock_close(struct inode *inode, struct file *file);
 121static unsigned int sock_poll(struct file *file,
 122                              struct poll_table_struct *wait);
 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 124#ifdef CONFIG_COMPAT
 125static long compat_sock_ioctl(struct file *file,
 126                              unsigned int cmd, unsigned long arg);
 127#endif
 128static int sock_fasync(int fd, struct file *filp, int on);
 129static ssize_t sock_sendpage(struct file *file, struct page *page,
 130                             int offset, size_t size, loff_t *ppos, int more);
 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 132                                struct pipe_inode_info *pipe, size_t len,
 133                                unsigned int flags);
 134
 135/*
 136 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 137 *      in the operation structures but are done directly via the socketcall() multiplexor.
 138 */
 139
 140static const struct file_operations socket_file_ops = {
 141        .owner =        THIS_MODULE,
 142        .llseek =       no_llseek,
 143        .read =         new_sync_read,
 144        .write =        new_sync_write,
 145        .read_iter =    sock_read_iter,
 146        .write_iter =   sock_write_iter,
 147        .poll =         sock_poll,
 148        .unlocked_ioctl = sock_ioctl,
 149#ifdef CONFIG_COMPAT
 150        .compat_ioctl = compat_sock_ioctl,
 151#endif
 152        .mmap =         sock_mmap,
 153        .release =      sock_close,
 154        .fasync =       sock_fasync,
 155        .sendpage =     sock_sendpage,
 156        .splice_write = generic_splice_sendpage,
 157        .splice_read =  sock_splice_read,
 158};
 159
 160/*
 161 *      The protocol list. Each protocol is registered in here.
 162 */
 163
 164static DEFINE_SPINLOCK(net_family_lock);
 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 166
 167/*
 168 *      Statistics counters of the socket lists
 169 */
 170
 171static DEFINE_PER_CPU(int, sockets_in_use);
 172
 173/*
 174 * Support routines.
 175 * Move socket addresses back and forth across the kernel/user
 176 * divide and look after the messy bits.
 177 */
 178
 179/**
 180 *      move_addr_to_kernel     -       copy a socket address into kernel space
 181 *      @uaddr: Address in user space
 182 *      @kaddr: Address in kernel space
 183 *      @ulen: Length in user space
 184 *
 185 *      The address is copied into kernel space. If the provided address is
 186 *      too long an error code of -EINVAL is returned. If the copy gives
 187 *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 188 */
 189
 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
 191{
 192        if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
 193                return -EINVAL;
 194        if (ulen == 0)
 195                return 0;
 196        if (copy_from_user(kaddr, uaddr, ulen))
 197                return -EFAULT;
 198        return audit_sockaddr(ulen, kaddr);
 199}
 200
 201/**
 202 *      move_addr_to_user       -       copy an address to user space
 203 *      @kaddr: kernel space address
 204 *      @klen: length of address in kernel
 205 *      @uaddr: user space address
 206 *      @ulen: pointer to user length field
 207 *
 208 *      The value pointed to by ulen on entry is the buffer length available.
 209 *      This is overwritten with the buffer space used. -EINVAL is returned
 210 *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 211 *      is returned if either the buffer or the length field are not
 212 *      accessible.
 213 *      After copying the data up to the limit the user specifies, the true
 214 *      length of the data is written over the length limit the user
 215 *      specified. Zero is returned for a success.
 216 */
 217
 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 219                             void __user *uaddr, int __user *ulen)
 220{
 221        int err;
 222        int len;
 223
 224        BUG_ON(klen > sizeof(struct sockaddr_storage));
 225        err = get_user(len, ulen);
 226        if (err)
 227                return err;
 228        if (len > klen)
 229                len = klen;
 230        if (len < 0)
 231                return -EINVAL;
 232        if (len) {
 233                if (audit_sockaddr(klen, kaddr))
 234                        return -ENOMEM;
 235                if (copy_to_user(uaddr, kaddr, len))
 236                        return -EFAULT;
 237        }
 238        /*
 239         *      "fromlen shall refer to the value before truncation.."
 240         *                      1003.1g
 241         */
 242        return __put_user(klen, ulen);
 243}
 244
 245static struct kmem_cache *sock_inode_cachep __read_mostly;
 246
 247static struct inode *sock_alloc_inode(struct super_block *sb)
 248{
 249        struct socket_alloc *ei;
 250        struct socket_wq *wq;
 251
 252        ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 253        if (!ei)
 254                return NULL;
 255        wq = kmalloc(sizeof(*wq), GFP_KERNEL);
 256        if (!wq) {
 257                kmem_cache_free(sock_inode_cachep, ei);
 258                return NULL;
 259        }
 260        init_waitqueue_head(&wq->wait);
 261        wq->fasync_list = NULL;
 262        RCU_INIT_POINTER(ei->socket.wq, wq);
 263
 264        ei->socket.state = SS_UNCONNECTED;
 265        ei->socket.flags = 0;
 266        ei->socket.ops = NULL;
 267        ei->socket.sk = NULL;
 268        ei->socket.file = NULL;
 269
 270        return &ei->vfs_inode;
 271}
 272
 273static void sock_destroy_inode(struct inode *inode)
 274{
 275        struct socket_alloc *ei;
 276        struct socket_wq *wq;
 277
 278        ei = container_of(inode, struct socket_alloc, vfs_inode);
 279        wq = rcu_dereference_protected(ei->socket.wq, 1);
 280        kfree_rcu(wq, rcu);
 281        kmem_cache_free(sock_inode_cachep, ei);
 282}
 283
 284static void init_once(void *foo)
 285{
 286        struct socket_alloc *ei = (struct socket_alloc *)foo;
 287
 288        inode_init_once(&ei->vfs_inode);
 289}
 290
 291static int init_inodecache(void)
 292{
 293        sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 294                                              sizeof(struct socket_alloc),
 295                                              0,
 296                                              (SLAB_HWCACHE_ALIGN |
 297                                               SLAB_RECLAIM_ACCOUNT |
 298                                               SLAB_MEM_SPREAD),
 299                                              init_once);
 300        if (sock_inode_cachep == NULL)
 301                return -ENOMEM;
 302        return 0;
 303}
 304
 305static const struct super_operations sockfs_ops = {
 306        .alloc_inode    = sock_alloc_inode,
 307        .destroy_inode  = sock_destroy_inode,
 308        .statfs         = simple_statfs,
 309};
 310
 311/*
 312 * sockfs_dname() is called from d_path().
 313 */
 314static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 315{
 316        return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
 317                                dentry->d_inode->i_ino);
 318}
 319
 320static const struct dentry_operations sockfs_dentry_operations = {
 321        .d_dname  = sockfs_dname,
 322};
 323
 324static struct dentry *sockfs_mount(struct file_system_type *fs_type,
 325                         int flags, const char *dev_name, void *data)
 326{
 327        return mount_pseudo(fs_type, "socket:", &sockfs_ops,
 328                &sockfs_dentry_operations, SOCKFS_MAGIC);
 329}
 330
 331static struct vfsmount *sock_mnt __read_mostly;
 332
 333static struct file_system_type sock_fs_type = {
 334        .name =         "sockfs",
 335        .mount =        sockfs_mount,
 336        .kill_sb =      kill_anon_super,
 337};
 338
 339/*
 340 *      Obtains the first available file descriptor and sets it up for use.
 341 *
 342 *      These functions create file structures and maps them to fd space
 343 *      of the current process. On success it returns file descriptor
 344 *      and file struct implicitly stored in sock->file.
 345 *      Note that another thread may close file descriptor before we return
 346 *      from this function. We use the fact that now we do not refer
 347 *      to socket after mapping. If one day we will need it, this
 348 *      function will increment ref. count on file by 1.
 349 *
 350 *      In any case returned fd MAY BE not valid!
 351 *      This race condition is unavoidable
 352 *      with shared fd spaces, we cannot solve it inside kernel,
 353 *      but we take care of internal coherence yet.
 354 */
 355
 356struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 357{
 358        struct qstr name = { .name = "" };
 359        struct path path;
 360        struct file *file;
 361
 362        if (dname) {
 363                name.name = dname;
 364                name.len = strlen(name.name);
 365        } else if (sock->sk) {
 366                name.name = sock->sk->sk_prot_creator->name;
 367                name.len = strlen(name.name);
 368        }
 369        path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
 370        if (unlikely(!path.dentry))
 371                return ERR_PTR(-ENOMEM);
 372        path.mnt = mntget(sock_mnt);
 373
 374        d_instantiate(path.dentry, SOCK_INODE(sock));
 375
 376        file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 377                  &socket_file_ops);
 378        if (unlikely(IS_ERR(file))) {
 379                /* drop dentry, keep inode */
 380                ihold(path.dentry->d_inode);
 381                path_put(&path);
 382                return file;
 383        }
 384
 385        sock->file = file;
 386        file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 387        file->private_data = sock;
 388        return file;
 389}
 390EXPORT_SYMBOL(sock_alloc_file);
 391
 392static int sock_map_fd(struct socket *sock, int flags)
 393{
 394        struct file *newfile;
 395        int fd = get_unused_fd_flags(flags);
 396        if (unlikely(fd < 0))
 397                return fd;
 398
 399        newfile = sock_alloc_file(sock, flags, NULL);
 400        if (likely(!IS_ERR(newfile))) {
 401                fd_install(fd, newfile);
 402                return fd;
 403        }
 404
 405        put_unused_fd(fd);
 406        return PTR_ERR(newfile);
 407}
 408
 409struct socket *sock_from_file(struct file *file, int *err)
 410{
 411        if (file->f_op == &socket_file_ops)
 412                return file->private_data;      /* set in sock_map_fd */
 413
 414        *err = -ENOTSOCK;
 415        return NULL;
 416}
 417EXPORT_SYMBOL(sock_from_file);
 418
 419/**
 420 *      sockfd_lookup - Go from a file number to its socket slot
 421 *      @fd: file handle
 422 *      @err: pointer to an error code return
 423 *
 424 *      The file handle passed in is locked and the socket it is bound
 425 *      too is returned. If an error occurs the err pointer is overwritten
 426 *      with a negative errno code and NULL is returned. The function checks
 427 *      for both invalid handles and passing a handle which is not a socket.
 428 *
 429 *      On a success the socket object pointer is returned.
 430 */
 431
 432struct socket *sockfd_lookup(int fd, int *err)
 433{
 434        struct file *file;
 435        struct socket *sock;
 436
 437        file = fget(fd);
 438        if (!file) {
 439                *err = -EBADF;
 440                return NULL;
 441        }
 442
 443        sock = sock_from_file(file, err);
 444        if (!sock)
 445                fput(file);
 446        return sock;
 447}
 448EXPORT_SYMBOL(sockfd_lookup);
 449
 450static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 451{
 452        struct fd f = fdget(fd);
 453        struct socket *sock;
 454
 455        *err = -EBADF;
 456        if (f.file) {
 457                sock = sock_from_file(f.file, err);
 458                if (likely(sock)) {
 459                        *fput_needed = f.flags;
 460                        return sock;
 461                }
 462                fdput(f);
 463        }
 464        return NULL;
 465}
 466
 467#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
 468#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
 469#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
 470static ssize_t sockfs_getxattr(struct dentry *dentry,
 471                               const char *name, void *value, size_t size)
 472{
 473        const char *proto_name;
 474        size_t proto_size;
 475        int error;
 476
 477        error = -ENODATA;
 478        if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
 479                proto_name = dentry->d_name.name;
 480                proto_size = strlen(proto_name);
 481
 482                if (value) {
 483                        error = -ERANGE;
 484                        if (proto_size + 1 > size)
 485                                goto out;
 486
 487                        strncpy(value, proto_name, proto_size + 1);
 488                }
 489                error = proto_size + 1;
 490        }
 491
 492out:
 493        return error;
 494}
 495
 496static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
 497                                size_t size)
 498{
 499        ssize_t len;
 500        ssize_t used = 0;
 501
 502        len = security_inode_listsecurity(dentry->d_inode, buffer, size);
 503        if (len < 0)
 504                return len;
 505        used += len;
 506        if (buffer) {
 507                if (size < used)
 508                        return -ERANGE;
 509                buffer += len;
 510        }
 511
 512        len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
 513        used += len;
 514        if (buffer) {
 515                if (size < used)
 516                        return -ERANGE;
 517                memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
 518                buffer += len;
 519        }
 520
 521        return used;
 522}
 523
 524static const struct inode_operations sockfs_inode_ops = {
 525        .getxattr = sockfs_getxattr,
 526        .listxattr = sockfs_listxattr,
 527};
 528
 529/**
 530 *      sock_alloc      -       allocate a socket
 531 *
 532 *      Allocate a new inode and socket object. The two are bound together
 533 *      and initialised. The socket is then returned. If we are out of inodes
 534 *      NULL is returned.
 535 */
 536
 537static struct socket *sock_alloc(void)
 538{
 539        struct inode *inode;
 540        struct socket *sock;
 541
 542        inode = new_inode_pseudo(sock_mnt->mnt_sb);
 543        if (!inode)
 544                return NULL;
 545
 546        sock = SOCKET_I(inode);
 547
 548        kmemcheck_annotate_bitfield(sock, type);
 549        inode->i_ino = get_next_ino();
 550        inode->i_mode = S_IFSOCK | S_IRWXUGO;
 551        inode->i_uid = current_fsuid();
 552        inode->i_gid = current_fsgid();
 553        inode->i_op = &sockfs_inode_ops;
 554
 555        this_cpu_add(sockets_in_use, 1);
 556        return sock;
 557}
 558
 559/**
 560 *      sock_release    -       close a socket
 561 *      @sock: socket to close
 562 *
 563 *      The socket is released from the protocol stack if it has a release
 564 *      callback, and the inode is then released if the socket is bound to
 565 *      an inode not a file.
 566 */
 567
 568void sock_release(struct socket *sock)
 569{
 570        if (sock->ops) {
 571                struct module *owner = sock->ops->owner;
 572
 573                sock->ops->release(sock);
 574                sock->ops = NULL;
 575                module_put(owner);
 576        }
 577
 578        if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
 579                pr_err("%s: fasync list not empty!\n", __func__);
 580
 581        if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
 582                return;
 583
 584        this_cpu_sub(sockets_in_use, 1);
 585        if (!sock->file) {
 586                iput(SOCK_INODE(sock));
 587                return;
 588        }
 589        sock->file = NULL;
 590}
 591EXPORT_SYMBOL(sock_release);
 592
 593void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
 594{
 595        u8 flags = *tx_flags;
 596
 597        if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
 598                flags |= SKBTX_HW_TSTAMP;
 599
 600        if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
 601                flags |= SKBTX_SW_TSTAMP;
 602
 603        if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
 604                flags |= SKBTX_SCHED_TSTAMP;
 605
 606        if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
 607                flags |= SKBTX_ACK_TSTAMP;
 608
 609        *tx_flags = flags;
 610}
 611EXPORT_SYMBOL(__sock_tx_timestamp);
 612
 613static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 614                                       struct msghdr *msg, size_t size)
 615{
 616        return sock->ops->sendmsg(iocb, sock, msg, size);
 617}
 618
 619static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 620                                 struct msghdr *msg, size_t size)
 621{
 622        int err = security_socket_sendmsg(sock, msg, size);
 623
 624        return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
 625}
 626
 627static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 628                           size_t size, bool nosec)
 629{
 630        struct kiocb iocb;
 631        int ret;
 632
 633        init_sync_kiocb(&iocb, NULL);
 634        ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
 635                      __sock_sendmsg(&iocb, sock, msg, size);
 636        if (-EIOCBQUEUED == ret)
 637                ret = wait_on_sync_kiocb(&iocb);
 638        return ret;
 639}
 640
 641int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 642{
 643        return do_sock_sendmsg(sock, msg, size, false);
 644}
 645EXPORT_SYMBOL(sock_sendmsg);
 646
 647static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
 648{
 649        return do_sock_sendmsg(sock, msg, size, true);
 650}
 651
 652int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 653                   struct kvec *vec, size_t num, size_t size)
 654{
 655        mm_segment_t oldfs = get_fs();
 656        int result;
 657
 658        set_fs(KERNEL_DS);
 659        /*
 660         * the following is safe, since for compiler definitions of kvec and
 661         * iovec are identical, yielding the same in-core layout and alignment
 662         */
 663        iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
 664        result = sock_sendmsg(sock, msg, size);
 665        set_fs(oldfs);
 666        return result;
 667}
 668EXPORT_SYMBOL(kernel_sendmsg);
 669
 670/*
 671 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
 672 */
 673void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 674        struct sk_buff *skb)
 675{
 676        int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 677        struct scm_timestamping tss;
 678        int empty = 1;
 679        struct skb_shared_hwtstamps *shhwtstamps =
 680                skb_hwtstamps(skb);
 681
 682        /* Race occurred between timestamp enabling and packet
 683           receiving.  Fill in the current time for now. */
 684        if (need_software_tstamp && skb->tstamp.tv64 == 0)
 685                __net_timestamp(skb);
 686
 687        if (need_software_tstamp) {
 688                if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 689                        struct timeval tv;
 690                        skb_get_timestamp(skb, &tv);
 691                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 692                                 sizeof(tv), &tv);
 693                } else {
 694                        struct timespec ts;
 695                        skb_get_timestampns(skb, &ts);
 696                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
 697                                 sizeof(ts), &ts);
 698                }
 699        }
 700
 701        memset(&tss, 0, sizeof(tss));
 702        if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
 703            ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
 704                empty = 0;
 705        if (shhwtstamps &&
 706            (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 707            ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
 708                empty = 0;
 709        if (!empty)
 710                put_cmsg(msg, SOL_SOCKET,
 711                         SCM_TIMESTAMPING, sizeof(tss), &tss);
 712}
 713EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 714
 715void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
 716        struct sk_buff *skb)
 717{
 718        int ack;
 719
 720        if (!sock_flag(sk, SOCK_WIFI_STATUS))
 721                return;
 722        if (!skb->wifi_acked_valid)
 723                return;
 724
 725        ack = skb->wifi_acked;
 726
 727        put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
 728}
 729EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
 730
 731static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 732                                   struct sk_buff *skb)
 733{
 734        if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
 735                put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
 736                        sizeof(__u32), &skb->dropcount);
 737}
 738
 739void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 740        struct sk_buff *skb)
 741{
 742        sock_recv_timestamp(msg, sk, skb);
 743        sock_recv_drops(msg, sk, skb);
 744}
 745EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 746
 747static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 748                                       struct msghdr *msg, size_t size, int flags)
 749{
 750        return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 751}
 752
 753static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 754                                 struct msghdr *msg, size_t size, int flags)
 755{
 756        int err = security_socket_recvmsg(sock, msg, size, flags);
 757
 758        return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
 759}
 760
 761int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 762                 size_t size, int flags)
 763{
 764        struct kiocb iocb;
 765        int ret;
 766
 767        init_sync_kiocb(&iocb, NULL);
 768        ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 769        if (-EIOCBQUEUED == ret)
 770                ret = wait_on_sync_kiocb(&iocb);
 771        return ret;
 772}
 773EXPORT_SYMBOL(sock_recvmsg);
 774
 775static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 776                              size_t size, int flags)
 777{
 778        struct kiocb iocb;
 779        int ret;
 780
 781        init_sync_kiocb(&iocb, NULL);
 782        ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
 783        if (-EIOCBQUEUED == ret)
 784                ret = wait_on_sync_kiocb(&iocb);
 785        return ret;
 786}
 787
 788/**
 789 * kernel_recvmsg - Receive a message from a socket (kernel space)
 790 * @sock:       The socket to receive the message from
 791 * @msg:        Received message
 792 * @vec:        Input s/g array for message data
 793 * @num:        Size of input s/g array
 794 * @size:       Number of bytes to read
 795 * @flags:      Message flags (MSG_DONTWAIT, etc...)
 796 *
 797 * On return the msg structure contains the scatter/gather array passed in the
 798 * vec argument. The array is modified so that it consists of the unfilled
 799 * portion of the original array.
 800 *
 801 * The returned value is the total number of bytes received, or an error.
 802 */
 803int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 804                   struct kvec *vec, size_t num, size_t size, int flags)
 805{
 806        mm_segment_t oldfs = get_fs();
 807        int result;
 808
 809        set_fs(KERNEL_DS);
 810        /*
 811         * the following is safe, since for compiler definitions of kvec and
 812         * iovec are identical, yielding the same in-core layout and alignment
 813         */
 814        iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
 815        result = sock_recvmsg(sock, msg, size, flags);
 816        set_fs(oldfs);
 817        return result;
 818}
 819EXPORT_SYMBOL(kernel_recvmsg);
 820
 821static ssize_t sock_sendpage(struct file *file, struct page *page,
 822                             int offset, size_t size, loff_t *ppos, int more)
 823{
 824        struct socket *sock;
 825        int flags;
 826
 827        sock = file->private_data;
 828
 829        flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 830        /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
 831        flags |= more;
 832
 833        return kernel_sendpage(sock, page, offset, size, flags);
 834}
 835
 836static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 837                                struct pipe_inode_info *pipe, size_t len,
 838                                unsigned int flags)
 839{
 840        struct socket *sock = file->private_data;
 841
 842        if (unlikely(!sock->ops->splice_read))
 843                return -EINVAL;
 844
 845        return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 846}
 847
 848static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 849{
 850        struct file *file = iocb->ki_filp;
 851        struct socket *sock = file->private_data;
 852        struct msghdr msg = {.msg_iter = *to};
 853        ssize_t res;
 854
 855        if (file->f_flags & O_NONBLOCK)
 856                msg.msg_flags = MSG_DONTWAIT;
 857
 858        if (iocb->ki_pos != 0)
 859                return -ESPIPE;
 860
 861        if (iocb->ki_nbytes == 0)       /* Match SYS5 behaviour */
 862                return 0;
 863
 864        res = __sock_recvmsg(iocb, sock, &msg,
 865                             iocb->ki_nbytes, msg.msg_flags);
 866        *to = msg.msg_iter;
 867        return res;
 868}
 869
 870static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 871{
 872        struct file *file = iocb->ki_filp;
 873        struct socket *sock = file->private_data;
 874        struct msghdr msg = {.msg_iter = *from};
 875        ssize_t res;
 876
 877        if (iocb->ki_pos != 0)
 878                return -ESPIPE;
 879
 880        if (file->f_flags & O_NONBLOCK)
 881                msg.msg_flags = MSG_DONTWAIT;
 882
 883        if (sock->type == SOCK_SEQPACKET)
 884                msg.msg_flags |= MSG_EOR;
 885
 886        res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
 887        *from = msg.msg_iter;
 888        return res;
 889}
 890
 891/*
 892 * Atomic setting of ioctl hooks to avoid race
 893 * with module unload.
 894 */
 895
 896static DEFINE_MUTEX(br_ioctl_mutex);
 897static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
 898
 899void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 900{
 901        mutex_lock(&br_ioctl_mutex);
 902        br_ioctl_hook = hook;
 903        mutex_unlock(&br_ioctl_mutex);
 904}
 905EXPORT_SYMBOL(brioctl_set);
 906
 907static DEFINE_MUTEX(vlan_ioctl_mutex);
 908static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
 909
 910void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 911{
 912        mutex_lock(&vlan_ioctl_mutex);
 913        vlan_ioctl_hook = hook;
 914        mutex_unlock(&vlan_ioctl_mutex);
 915}
 916EXPORT_SYMBOL(vlan_ioctl_set);
 917
 918static DEFINE_MUTEX(dlci_ioctl_mutex);
 919static int (*dlci_ioctl_hook) (unsigned int, void __user *);
 920
 921void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 922{
 923        mutex_lock(&dlci_ioctl_mutex);
 924        dlci_ioctl_hook = hook;
 925        mutex_unlock(&dlci_ioctl_mutex);
 926}
 927EXPORT_SYMBOL(dlci_ioctl_set);
 928
 929static long sock_do_ioctl(struct net *net, struct socket *sock,
 930                                 unsigned int cmd, unsigned long arg)
 931{
 932        int err;
 933        void __user *argp = (void __user *)arg;
 934
 935        err = sock->ops->ioctl(sock, cmd, arg);
 936
 937        /*
 938         * If this ioctl is unknown try to hand it down
 939         * to the NIC driver.
 940         */
 941        if (err == -ENOIOCTLCMD)
 942                err = dev_ioctl(net, cmd, argp);
 943
 944        return err;
 945}
 946
 947/*
 948 *      With an ioctl, arg may well be a user mode pointer, but we don't know
 949 *      what to do with it - that's up to the protocol still.
 950 */
 951
 952static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 953{
 954        struct socket *sock;
 955        struct sock *sk;
 956        void __user *argp = (void __user *)arg;
 957        int pid, err;
 958        struct net *net;
 959
 960        sock = file->private_data;
 961        sk = sock->sk;
 962        net = sock_net(sk);
 963        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
 964                err = dev_ioctl(net, cmd, argp);
 965        } else
 966#ifdef CONFIG_WEXT_CORE
 967        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 968                err = dev_ioctl(net, cmd, argp);
 969        } else
 970#endif
 971                switch (cmd) {
 972                case FIOSETOWN:
 973                case SIOCSPGRP:
 974                        err = -EFAULT;
 975                        if (get_user(pid, (int __user *)argp))
 976                                break;
 977                        f_setown(sock->file, pid, 1);
 978                        err = 0;
 979                        break;
 980                case FIOGETOWN:
 981                case SIOCGPGRP:
 982                        err = put_user(f_getown(sock->file),
 983                                       (int __user *)argp);
 984                        break;
 985                case SIOCGIFBR:
 986                case SIOCSIFBR:
 987                case SIOCBRADDBR:
 988                case SIOCBRDELBR:
 989                        err = -ENOPKG;
 990                        if (!br_ioctl_hook)
 991                                request_module("bridge");
 992
 993                        mutex_lock(&br_ioctl_mutex);
 994                        if (br_ioctl_hook)
 995                                err = br_ioctl_hook(net, cmd, argp);
 996                        mutex_unlock(&br_ioctl_mutex);
 997                        break;
 998                case SIOCGIFVLAN:
 999                case SIOCSIFVLAN:
1000                        err = -ENOPKG;
1001                        if (!vlan_ioctl_hook)
1002                                request_module("8021q");
1003
1004                        mutex_lock(&vlan_ioctl_mutex);
1005                        if (vlan_ioctl_hook)
1006                                err = vlan_ioctl_hook(net, argp);
1007                        mutex_unlock(&vlan_ioctl_mutex);
1008                        break;
1009                case SIOCADDDLCI:
1010                case SIOCDELDLCI:
1011                        err = -ENOPKG;
1012                        if (!dlci_ioctl_hook)
1013                                request_module("dlci");
1014
1015                        mutex_lock(&dlci_ioctl_mutex);
1016                        if (dlci_ioctl_hook)
1017                                err = dlci_ioctl_hook(cmd, argp);
1018                        mutex_unlock(&dlci_ioctl_mutex);
1019                        break;
1020                default:
1021                        err = sock_do_ioctl(net, sock, cmd, arg);
1022                        break;
1023                }
1024        return err;
1025}
1026
1027int sock_create_lite(int family, int type, int protocol, struct socket **res)
1028{
1029        int err;
1030        struct socket *sock = NULL;
1031
1032        err = security_socket_create(family, type, protocol, 1);
1033        if (err)
1034                goto out;
1035
1036        sock = sock_alloc();
1037        if (!sock) {
1038                err = -ENOMEM;
1039                goto out;
1040        }
1041
1042        sock->type = type;
1043        err = security_socket_post_create(sock, family, type, protocol, 1);
1044        if (err)
1045                goto out_release;
1046
1047out:
1048        *res = sock;
1049        return err;
1050out_release:
1051        sock_release(sock);
1052        sock = NULL;
1053        goto out;
1054}
1055EXPORT_SYMBOL(sock_create_lite);
1056
1057/* No kernel lock held - perfect */
1058static unsigned int sock_poll(struct file *file, poll_table *wait)
1059{
1060        unsigned int busy_flag = 0;
1061        struct socket *sock;
1062
1063        /*
1064         *      We can't return errors to poll, so it's either yes or no.
1065         */
1066        sock = file->private_data;
1067
1068        if (sk_can_busy_loop(sock->sk)) {
1069                /* this socket can poll_ll so tell the system call */
1070                busy_flag = POLL_BUSY_LOOP;
1071
1072                /* once, only if requested by syscall */
1073                if (wait && (wait->_key & POLL_BUSY_LOOP))
1074                        sk_busy_loop(sock->sk, 1);
1075        }
1076
1077        return busy_flag | sock->ops->poll(file, sock, wait);
1078}
1079
1080static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1081{
1082        struct socket *sock = file->private_data;
1083
1084        return sock->ops->mmap(file, sock, vma);
1085}
1086
1087static int sock_close(struct inode *inode, struct file *filp)
1088{
1089        sock_release(SOCKET_I(inode));
1090        return 0;
1091}
1092
1093/*
1094 *      Update the socket async list
1095 *
1096 *      Fasync_list locking strategy.
1097 *
1098 *      1. fasync_list is modified only under process context socket lock
1099 *         i.e. under semaphore.
1100 *      2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1101 *         or under socket lock
1102 */
1103
1104static int sock_fasync(int fd, struct file *filp, int on)
1105{
1106        struct socket *sock = filp->private_data;
1107        struct sock *sk = sock->sk;
1108        struct socket_wq *wq;
1109
1110        if (sk == NULL)
1111                return -EINVAL;
1112
1113        lock_sock(sk);
1114        wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1115        fasync_helper(fd, filp, on, &wq->fasync_list);
1116
1117        if (!wq->fasync_list)
1118                sock_reset_flag(sk, SOCK_FASYNC);
1119        else
1120                sock_set_flag(sk, SOCK_FASYNC);
1121
1122        release_sock(sk);
1123        return 0;
1124}
1125
1126/* This function may be called only under socket lock or callback_lock or rcu_lock */
1127
1128int sock_wake_async(struct socket *sock, int how, int band)
1129{
1130        struct socket_wq *wq;
1131
1132        if (!sock)
1133                return -1;
1134        rcu_read_lock();
1135        wq = rcu_dereference(sock->wq);
1136        if (!wq || !wq->fasync_list) {
1137                rcu_read_unlock();
1138                return -1;
1139        }
1140        switch (how) {
1141        case SOCK_WAKE_WAITD:
1142                if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1143                        break;
1144                goto call_kill;
1145        case SOCK_WAKE_SPACE:
1146                if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1147                        break;
1148                /* fall through */
1149        case SOCK_WAKE_IO:
1150call_kill:
1151                kill_fasync(&wq->fasync_list, SIGIO, band);
1152                break;
1153        case SOCK_WAKE_URG:
1154                kill_fasync(&wq->fasync_list, SIGURG, band);
1155        }
1156        rcu_read_unlock();
1157        return 0;
1158}
1159EXPORT_SYMBOL(sock_wake_async);
1160
1161int __sock_create(struct net *net, int family, int type, int protocol,
1162                         struct socket **res, int kern)
1163{
1164        int err;
1165        struct socket *sock;
1166        const struct net_proto_family *pf;
1167
1168        /*
1169         *      Check protocol is in range
1170         */
1171        if (family < 0 || family >= NPROTO)
1172                return -EAFNOSUPPORT;
1173        if (type < 0 || type >= SOCK_MAX)
1174                return -EINVAL;
1175
1176        /* Compatibility.
1177
1178           This uglymoron is moved from INET layer to here to avoid
1179           deadlock in module load.
1180         */
1181        if (family == PF_INET && type == SOCK_PACKET) {
1182                static int warned;
1183                if (!warned) {
1184                        warned = 1;
1185                        pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1186                                current->comm);
1187                }
1188                family = PF_PACKET;
1189        }
1190
1191        err = security_socket_create(family, type, protocol, kern);
1192        if (err)
1193                return err;
1194
1195        /*
1196         *      Allocate the socket and allow the family to set things up. if
1197         *      the protocol is 0, the family is instructed to select an appropriate
1198         *      default.
1199         */
1200        sock = sock_alloc();
1201        if (!sock) {
1202                net_warn_ratelimited("socket: no more sockets\n");
1203                return -ENFILE; /* Not exactly a match, but its the
1204                                   closest posix thing */
1205        }
1206
1207        sock->type = type;
1208
1209#ifdef CONFIG_MODULES
1210        /* Attempt to load a protocol module if the find failed.
1211         *
1212         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1213         * requested real, full-featured networking support upon configuration.
1214         * Otherwise module support will break!
1215         */
1216        if (rcu_access_pointer(net_families[family]) == NULL)
1217                request_module("net-pf-%d", family);
1218#endif
1219
1220        rcu_read_lock();
1221        pf = rcu_dereference(net_families[family]);
1222        err = -EAFNOSUPPORT;
1223        if (!pf)
1224                goto out_release;
1225
1226        /*
1227         * We will call the ->create function, that possibly is in a loadable
1228         * module, so we have to bump that loadable module refcnt first.
1229         */
1230        if (!try_module_get(pf->owner))
1231                goto out_release;
1232
1233        /* Now protected by module ref count */
1234        rcu_read_unlock();
1235
1236        err = pf->create(net, sock, protocol, kern);
1237        if (err < 0)
1238                goto out_module_put;
1239
1240        /*
1241         * Now to bump the refcnt of the [loadable] module that owns this
1242         * socket at sock_release time we decrement its refcnt.
1243         */
1244        if (!try_module_get(sock->ops->owner))
1245                goto out_module_busy;
1246
1247        /*
1248         * Now that we're done with the ->create function, the [loadable]
1249         * module can have its refcnt decremented
1250         */
1251        module_put(pf->owner);
1252        err = security_socket_post_create(sock, family, type, protocol, kern);
1253        if (err)
1254                goto out_sock_release;
1255        *res = sock;
1256
1257        return 0;
1258
1259out_module_busy:
1260        err = -EAFNOSUPPORT;
1261out_module_put:
1262        sock->ops = NULL;
1263        module_put(pf->owner);
1264out_sock_release:
1265        sock_release(sock);
1266        return err;
1267
1268out_release:
1269        rcu_read_unlock();
1270        goto out_sock_release;
1271}
1272EXPORT_SYMBOL(__sock_create);
1273
1274int sock_create(int family, int type, int protocol, struct socket **res)
1275{
1276        return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1277}
1278EXPORT_SYMBOL(sock_create);
1279
1280int sock_create_kern(int family, int type, int protocol, struct socket **res)
1281{
1282        return __sock_create(&init_net, family, type, protocol, res, 1);
1283}
1284EXPORT_SYMBOL(sock_create_kern);
1285
1286SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1287{
1288        int retval;
1289        struct socket *sock;
1290        int flags;
1291
1292        /* Check the SOCK_* constants for consistency.  */
1293        BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1294        BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1295        BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1296        BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1297
1298        flags = type & ~SOCK_TYPE_MASK;
1299        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1300                return -EINVAL;
1301        type &= SOCK_TYPE_MASK;
1302
1303        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1304                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1305
1306        retval = sock_create(family, type, protocol, &sock);
1307        if (retval < 0)
1308                goto out;
1309
1310        retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1311        if (retval < 0)
1312                goto out_release;
1313
1314out:
1315        /* It may be already another descriptor 8) Not kernel problem. */
1316        return retval;
1317
1318out_release:
1319        sock_release(sock);
1320        return retval;
1321}
1322
1323/*
1324 *      Create a pair of connected sockets.
1325 */
1326
1327SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1328                int __user *, usockvec)
1329{
1330        struct socket *sock1, *sock2;
1331        int fd1, fd2, err;
1332        struct file *newfile1, *newfile2;
1333        int flags;
1334
1335        flags = type & ~SOCK_TYPE_MASK;
1336        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1337                return -EINVAL;
1338        type &= SOCK_TYPE_MASK;
1339
1340        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1341                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1342
1343        /*
1344         * Obtain the first socket and check if the underlying protocol
1345         * supports the socketpair call.
1346         */
1347
1348        err = sock_create(family, type, protocol, &sock1);
1349        if (err < 0)
1350                goto out;
1351
1352        err = sock_create(family, type, protocol, &sock2);
1353        if (err < 0)
1354                goto out_release_1;
1355
1356        err = sock1->ops->socketpair(sock1, sock2);
1357        if (err < 0)
1358                goto out_release_both;
1359
1360        fd1 = get_unused_fd_flags(flags);
1361        if (unlikely(fd1 < 0)) {
1362                err = fd1;
1363                goto out_release_both;
1364        }
1365
1366        fd2 = get_unused_fd_flags(flags);
1367        if (unlikely(fd2 < 0)) {
1368                err = fd2;
1369                goto out_put_unused_1;
1370        }
1371
1372        newfile1 = sock_alloc_file(sock1, flags, NULL);
1373        if (unlikely(IS_ERR(newfile1))) {
1374                err = PTR_ERR(newfile1);
1375                goto out_put_unused_both;
1376        }
1377
1378        newfile2 = sock_alloc_file(sock2, flags, NULL);
1379        if (IS_ERR(newfile2)) {
1380                err = PTR_ERR(newfile2);
1381                goto out_fput_1;
1382        }
1383
1384        err = put_user(fd1, &usockvec[0]);
1385        if (err)
1386                goto out_fput_both;
1387
1388        err = put_user(fd2, &usockvec[1]);
1389        if (err)
1390                goto out_fput_both;
1391
1392        audit_fd_pair(fd1, fd2);
1393
1394        fd_install(fd1, newfile1);
1395        fd_install(fd2, newfile2);
1396        /* fd1 and fd2 may be already another descriptors.
1397         * Not kernel problem.
1398         */
1399
1400        return 0;
1401
1402out_fput_both:
1403        fput(newfile2);
1404        fput(newfile1);
1405        put_unused_fd(fd2);
1406        put_unused_fd(fd1);
1407        goto out;
1408
1409out_fput_1:
1410        fput(newfile1);
1411        put_unused_fd(fd2);
1412        put_unused_fd(fd1);
1413        sock_release(sock2);
1414        goto out;
1415
1416out_put_unused_both:
1417        put_unused_fd(fd2);
1418out_put_unused_1:
1419        put_unused_fd(fd1);
1420out_release_both:
1421        sock_release(sock2);
1422out_release_1:
1423        sock_release(sock1);
1424out:
1425        return err;
1426}
1427
1428/*
1429 *      Bind a name to a socket. Nothing much to do here since it's
1430 *      the protocol's responsibility to handle the local address.
1431 *
1432 *      We move the socket address to kernel space before we call
1433 *      the protocol layer (having also checked the address is ok).
1434 */
1435
1436SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1437{
1438        struct socket *sock;
1439        struct sockaddr_storage address;
1440        int err, fput_needed;
1441
1442        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1443        if (sock) {
1444                err = move_addr_to_kernel(umyaddr, addrlen, &address);
1445                if (err >= 0) {
1446                        err = security_socket_bind(sock,
1447                                                   (struct sockaddr *)&address,
1448                                                   addrlen);
1449                        if (!err)
1450                                err = sock->ops->bind(sock,
1451                                                      (struct sockaddr *)
1452                                                      &address, addrlen);
1453                }
1454                fput_light(sock->file, fput_needed);
1455        }
1456        return err;
1457}
1458
1459/*
1460 *      Perform a listen. Basically, we allow the protocol to do anything
1461 *      necessary for a listen, and if that works, we mark the socket as
1462 *      ready for listening.
1463 */
1464
1465SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1466{
1467        struct socket *sock;
1468        int err, fput_needed;
1469        int somaxconn;
1470
1471        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1472        if (sock) {
1473                somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1474                if ((unsigned int)backlog > somaxconn)
1475                        backlog = somaxconn;
1476
1477                err = security_socket_listen(sock, backlog);
1478                if (!err)
1479                        err = sock->ops->listen(sock, backlog);
1480
1481                fput_light(sock->file, fput_needed);
1482        }
1483        return err;
1484}
1485
1486/*
1487 *      For accept, we attempt to create a new socket, set up the link
1488 *      with the client, wake up the client, then return the new
1489 *      connected fd. We collect the address of the connector in kernel
1490 *      space and move it to user at the very end. This is unclean because
1491 *      we open the socket then return an error.
1492 *
1493 *      1003.1g adds the ability to recvmsg() to query connection pending
1494 *      status to recvmsg. We need to add that support in a way thats
1495 *      clean when we restucture accept also.
1496 */
1497
1498SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1499                int __user *, upeer_addrlen, int, flags)
1500{
1501        struct socket *sock, *newsock;
1502        struct file *newfile;
1503        int err, len, newfd, fput_needed;
1504        struct sockaddr_storage address;
1505
1506        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1507                return -EINVAL;
1508
1509        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1510                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1511
1512        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1513        if (!sock)
1514                goto out;
1515
1516        err = -ENFILE;
1517        newsock = sock_alloc();
1518        if (!newsock)
1519                goto out_put;
1520
1521        newsock->type = sock->type;
1522        newsock->ops = sock->ops;
1523
1524        /*
1525         * We don't need try_module_get here, as the listening socket (sock)
1526         * has the protocol module (sock->ops->owner) held.
1527         */
1528        __module_get(newsock->ops->owner);
1529
1530        newfd = get_unused_fd_flags(flags);
1531        if (unlikely(newfd < 0)) {
1532                err = newfd;
1533                sock_release(newsock);
1534                goto out_put;
1535        }
1536        newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1537        if (unlikely(IS_ERR(newfile))) {
1538                err = PTR_ERR(newfile);
1539                put_unused_fd(newfd);
1540                sock_release(newsock);
1541                goto out_put;
1542        }
1543
1544        err = security_socket_accept(sock, newsock);
1545        if (err)
1546                goto out_fd;
1547
1548        err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1549        if (err < 0)
1550                goto out_fd;
1551
1552        if (upeer_sockaddr) {
1553                if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
1554                                          &len, 2) < 0) {
1555                        err = -ECONNABORTED;
1556                        goto out_fd;
1557                }
1558                err = move_addr_to_user(&address,
1559                                        len, upeer_sockaddr, upeer_addrlen);
1560                if (err < 0)
1561                        goto out_fd;
1562        }
1563
1564        /* File flags are not inherited via accept() unlike another OSes. */
1565
1566        fd_install(newfd, newfile);
1567        err = newfd;
1568
1569out_put:
1570        fput_light(sock->file, fput_needed);
1571out:
1572        return err;
1573out_fd:
1574        fput(newfile);
1575        put_unused_fd(newfd);
1576        goto out_put;
1577}
1578
1579SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1580                int __user *, upeer_addrlen)
1581{
1582        return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1583}
1584
1585/*
1586 *      Attempt to connect to a socket with the server address.  The address
1587 *      is in user space so we verify it is OK and move it to kernel space.
1588 *
1589 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1590 *      break bindings
1591 *
1592 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1593 *      other SEQPACKET protocols that take time to connect() as it doesn't
1594 *      include the -EINPROGRESS status for such sockets.
1595 */
1596
1597SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1598                int, addrlen)
1599{
1600        struct socket *sock;
1601        struct sockaddr_storage address;
1602        int err, fput_needed;
1603
1604        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1605        if (!sock)
1606                goto out;
1607        err = move_addr_to_kernel(uservaddr, addrlen, &address);
1608        if (err < 0)
1609                goto out_put;
1610
1611        err =
1612            security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1613        if (err)
1614                goto out_put;
1615
1616        err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1617                                 sock->file->f_flags);
1618out_put:
1619        fput_light(sock->file, fput_needed);
1620out:
1621        return err;
1622}
1623
1624/*
1625 *      Get the local address ('name') of a socket object. Move the obtained
1626 *      name to user space.
1627 */
1628
1629SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1630                int __user *, usockaddr_len)
1631{
1632        struct socket *sock;
1633        struct sockaddr_storage address;
1634        int len, err, fput_needed;
1635
1636        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1637        if (!sock)
1638                goto out;
1639
1640        err = security_socket_getsockname(sock);
1641        if (err)
1642                goto out_put;
1643
1644        err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1645        if (err)
1646                goto out_put;
1647        err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1648
1649out_put:
1650        fput_light(sock->file, fput_needed);
1651out:
1652        return err;
1653}
1654
1655/*
1656 *      Get the remote address ('name') of a socket object. Move the obtained
1657 *      name to user space.
1658 */
1659
1660SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1661                int __user *, usockaddr_len)
1662{
1663        struct socket *sock;
1664        struct sockaddr_storage address;
1665        int len, err, fput_needed;
1666
1667        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1668        if (sock != NULL) {
1669                err = security_socket_getpeername(sock);
1670                if (err) {
1671                        fput_light(sock->file, fput_needed);
1672                        return err;
1673                }
1674
1675                err =
1676                    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
1677                                       1);
1678                if (!err)
1679                        err = move_addr_to_user(&address, len, usockaddr,
1680                                                usockaddr_len);
1681                fput_light(sock->file, fput_needed);
1682        }
1683        return err;
1684}
1685
1686/*
1687 *      Send a datagram to a given address. We move the address into kernel
1688 *      space and check the user space data area is readable before invoking
1689 *      the protocol.
1690 */
1691
1692SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1693                unsigned int, flags, struct sockaddr __user *, addr,
1694                int, addr_len)
1695{
1696        struct socket *sock;
1697        struct sockaddr_storage address;
1698        int err;
1699        struct msghdr msg;
1700        struct iovec iov;
1701        int fput_needed;
1702
1703        if (len > INT_MAX)
1704                len = INT_MAX;
1705        if (unlikely(!access_ok(VERIFY_READ, buff, len)))
1706                return -EFAULT;
1707        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1708        if (!sock)
1709                goto out;
1710
1711        iov.iov_base = buff;
1712        iov.iov_len = len;
1713        msg.msg_name = NULL;
1714        iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
1715        msg.msg_control = NULL;
1716        msg.msg_controllen = 0;
1717        msg.msg_namelen = 0;
1718        if (addr) {
1719                err = move_addr_to_kernel(addr, addr_len, &address);
1720                if (err < 0)
1721                        goto out_put;
1722                msg.msg_name = (struct sockaddr *)&address;
1723                msg.msg_namelen = addr_len;
1724        }
1725        if (sock->file->f_flags & O_NONBLOCK)
1726                flags |= MSG_DONTWAIT;
1727        msg.msg_flags = flags;
1728        err = sock_sendmsg(sock, &msg, len);
1729
1730out_put:
1731        fput_light(sock->file, fput_needed);
1732out:
1733        return err;
1734}
1735
1736/*
1737 *      Send a datagram down a socket.
1738 */
1739
1740SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1741                unsigned int, flags)
1742{
1743        return sys_sendto(fd, buff, len, flags, NULL, 0);
1744}
1745
1746/*
1747 *      Receive a frame from the socket and optionally record the address of the
1748 *      sender. We verify the buffers are writable and if needed move the
1749 *      sender address from kernel to user space.
1750 */
1751
1752SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1753                unsigned int, flags, struct sockaddr __user *, addr,
1754                int __user *, addr_len)
1755{
1756        struct socket *sock;
1757        struct iovec iov;
1758        struct msghdr msg;
1759        struct sockaddr_storage address;
1760        int err, err2;
1761        int fput_needed;
1762
1763        if (size > INT_MAX)
1764                size = INT_MAX;
1765        if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
1766                return -EFAULT;
1767        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1768        if (!sock)
1769                goto out;
1770
1771        msg.msg_control = NULL;
1772        msg.msg_controllen = 0;
1773        iov.iov_len = size;
1774        iov.iov_base = ubuf;
1775        iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
1776        /* Save some cycles and don't copy the address if not needed */
1777        msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1778        /* We assume all kernel code knows the size of sockaddr_storage */
1779        msg.msg_namelen = 0;
1780        if (sock->file->f_flags & O_NONBLOCK)
1781                flags |= MSG_DONTWAIT;
1782        err = sock_recvmsg(sock, &msg, size, flags);
1783
1784        if (err >= 0 && addr != NULL) {
1785                err2 = move_addr_to_user(&address,
1786                                         msg.msg_namelen, addr, addr_len);
1787                if (err2 < 0)
1788                        err = err2;
1789        }
1790
1791        fput_light(sock->file, fput_needed);
1792out:
1793        return err;
1794}
1795
1796/*
1797 *      Receive a datagram from a socket.
1798 */
1799
1800SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1801                unsigned int, flags)
1802{
1803        return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1804}
1805
1806/*
1807 *      Set a socket option. Because we don't know the option lengths we have
1808 *      to pass the user mode parameter for the protocols to sort out.
1809 */
1810
1811SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1812                char __user *, optval, int, optlen)
1813{
1814        int err, fput_needed;
1815        struct socket *sock;
1816
1817        if (optlen < 0)
1818                return -EINVAL;
1819
1820        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1821        if (sock != NULL) {
1822                err = security_socket_setsockopt(sock, level, optname);
1823                if (err)
1824                        goto out_put;
1825
1826                if (level == SOL_SOCKET)
1827                        err =
1828                            sock_setsockopt(sock, level, optname, optval,
1829                                            optlen);
1830                else
1831                        err =
1832                            sock->ops->setsockopt(sock, level, optname, optval,
1833                                                  optlen);
1834out_put:
1835                fput_light(sock->file, fput_needed);
1836        }
1837        return err;
1838}
1839
1840/*
1841 *      Get a socket option. Because we don't know the option lengths we have
1842 *      to pass a user mode parameter for the protocols to sort out.
1843 */
1844
1845SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1846                char __user *, optval, int __user *, optlen)
1847{
1848        int err, fput_needed;
1849        struct socket *sock;
1850
1851        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1852        if (sock != NULL) {
1853                err = security_socket_getsockopt(sock, level, optname);
1854                if (err)
1855                        goto out_put;
1856
1857                if (level == SOL_SOCKET)
1858                        err =
1859                            sock_getsockopt(sock, level, optname, optval,
1860                                            optlen);
1861                else
1862                        err =
1863                            sock->ops->getsockopt(sock, level, optname, optval,
1864                                                  optlen);
1865out_put:
1866                fput_light(sock->file, fput_needed);
1867        }
1868        return err;
1869}
1870
1871/*
1872 *      Shutdown a socket.
1873 */
1874
1875SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1876{
1877        int err, fput_needed;
1878        struct socket *sock;
1879
1880        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1881        if (sock != NULL) {
1882                err = security_socket_shutdown(sock, how);
1883                if (!err)
1884                        err = sock->ops->shutdown(sock, how);
1885                fput_light(sock->file, fput_needed);
1886        }
1887        return err;
1888}
1889
1890/* A couple of helpful macros for getting the address of the 32/64 bit
1891 * fields which are the same type (int / unsigned) on our platforms.
1892 */
1893#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1894#define COMPAT_NAMELEN(msg)     COMPAT_MSG(msg, msg_namelen)
1895#define COMPAT_FLAGS(msg)       COMPAT_MSG(msg, msg_flags)
1896
1897struct used_address {
1898        struct sockaddr_storage name;
1899        unsigned int name_len;
1900};
1901
1902static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
1903                                     struct user_msghdr __user *umsg,
1904                                     struct sockaddr __user **save_addr,
1905                                     struct iovec **iov)
1906{
1907        struct sockaddr __user *uaddr;
1908        struct iovec __user *uiov;
1909        size_t nr_segs;
1910        ssize_t err;
1911
1912        if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1913            __get_user(uaddr, &umsg->msg_name) ||
1914            __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1915            __get_user(uiov, &umsg->msg_iov) ||
1916            __get_user(nr_segs, &umsg->msg_iovlen) ||
1917            __get_user(kmsg->msg_control, &umsg->msg_control) ||
1918            __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1919            __get_user(kmsg->msg_flags, &umsg->msg_flags))
1920                return -EFAULT;
1921
1922        if (!uaddr)
1923                kmsg->msg_namelen = 0;
1924
1925        if (kmsg->msg_namelen < 0)
1926                return -EINVAL;
1927
1928        if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
1929                kmsg->msg_namelen = sizeof(struct sockaddr_storage);
1930
1931        if (save_addr)
1932                *save_addr = uaddr;
1933
1934        if (uaddr && kmsg->msg_namelen) {
1935                if (!save_addr) {
1936                        err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1937                                                  kmsg->msg_name);
1938                        if (err < 0)
1939                                return err;
1940                }
1941        } else {
1942                kmsg->msg_name = NULL;
1943                kmsg->msg_namelen = 0;
1944        }
1945
1946        if (nr_segs > UIO_MAXIOV)
1947                return -EMSGSIZE;
1948
1949        err = rw_copy_check_uvector(save_addr ? READ : WRITE,
1950                                    uiov, nr_segs,
1951                                    UIO_FASTIOV, *iov, iov);
1952        if (err >= 0)
1953                iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
1954                              *iov, nr_segs, err);
1955        return err;
1956}
1957
1958static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
1959                         struct msghdr *msg_sys, unsigned int flags,
1960                         struct used_address *used_address)
1961{
1962        struct compat_msghdr __user *msg_compat =
1963            (struct compat_msghdr __user *)msg;
1964        struct sockaddr_storage address;
1965        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1966        unsigned char ctl[sizeof(struct cmsghdr) + 20]
1967            __attribute__ ((aligned(sizeof(__kernel_size_t))));
1968        /* 20 is size of ipv6_pktinfo */
1969        unsigned char *ctl_buf = ctl;
1970        int ctl_len, total_len;
1971        ssize_t err;
1972
1973        msg_sys->msg_name = &address;
1974
1975        if (MSG_CMSG_COMPAT & flags)
1976                err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
1977        else
1978                err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
1979        if (err < 0)
1980                goto out_freeiov;
1981        total_len = err;
1982
1983        err = -ENOBUFS;
1984
1985        if (msg_sys->msg_controllen > INT_MAX)
1986                goto out_freeiov;
1987        ctl_len = msg_sys->msg_controllen;
1988        if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1989                err =
1990                    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
1991                                                     sizeof(ctl));
1992                if (err)
1993                        goto out_freeiov;
1994                ctl_buf = msg_sys->msg_control;
1995                ctl_len = msg_sys->msg_controllen;
1996        } else if (ctl_len) {
1997                if (ctl_len > sizeof(ctl)) {
1998                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1999                        if (ctl_buf == NULL)
2000                                goto out_freeiov;
2001                }
2002                err = -EFAULT;
2003                /*
2004                 * Careful! Before this, msg_sys->msg_control contains a user pointer.
2005                 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2006                 * checking falls down on this.
2007                 */
2008                if (copy_from_user(ctl_buf,
2009                                   (void __user __force *)msg_sys->msg_control,
2010                                   ctl_len))
2011                        goto out_freectl;
2012                msg_sys->msg_control = ctl_buf;
2013        }
2014        msg_sys->msg_flags = flags;
2015
2016        if (sock->file->f_flags & O_NONBLOCK)
2017                msg_sys->msg_flags |= MSG_DONTWAIT;
2018        /*
2019         * If this is sendmmsg() and current destination address is same as
2020         * previously succeeded address, omit asking LSM's decision.
2021         * used_address->name_len is initialized to UINT_MAX so that the first
2022         * destination address never matches.
2023         */
2024        if (used_address && msg_sys->msg_name &&
2025            used_address->name_len == msg_sys->msg_namelen &&
2026            !memcmp(&used_address->name, msg_sys->msg_name,
2027                    used_address->name_len)) {
2028                err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2029                goto out_freectl;
2030        }
2031        err = sock_sendmsg(sock, msg_sys, total_len);
2032        /*
2033         * If this is sendmmsg() and sending to current destination address was
2034         * successful, remember it.
2035         */
2036        if (used_address && err >= 0) {
2037                used_address->name_len = msg_sys->msg_namelen;
2038                if (msg_sys->msg_name)
2039                        memcpy(&used_address->name, msg_sys->msg_name,
2040                               used_address->name_len);
2041        }
2042
2043out_freectl:
2044        if (ctl_buf != ctl)
2045                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2046out_freeiov:
2047        if (iov != iovstack)
2048                kfree(iov);
2049        return err;
2050}
2051
2052/*
2053 *      BSD sendmsg interface
2054 */
2055
2056long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
2057{
2058        int fput_needed, err;
2059        struct msghdr msg_sys;
2060        struct socket *sock;
2061
2062        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2063        if (!sock)
2064                goto out;
2065
2066        err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
2067
2068        fput_light(sock->file, fput_needed);
2069out:
2070        return err;
2071}
2072
2073SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
2074{
2075        if (flags & MSG_CMSG_COMPAT)
2076                return -EINVAL;
2077        return __sys_sendmsg(fd, msg, flags);
2078}
2079
2080/*
2081 *      Linux sendmmsg interface
2082 */
2083
2084int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2085                   unsigned int flags)
2086{
2087        int fput_needed, err, datagrams;
2088        struct socket *sock;
2089        struct mmsghdr __user *entry;
2090        struct compat_mmsghdr __user *compat_entry;
2091        struct msghdr msg_sys;
2092        struct used_address used_address;
2093
2094        if (vlen > UIO_MAXIOV)
2095                vlen = UIO_MAXIOV;
2096
2097        datagrams = 0;
2098
2099        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2100        if (!sock)
2101                return err;
2102
2103        used_address.name_len = UINT_MAX;
2104        entry = mmsg;
2105        compat_entry = (struct compat_mmsghdr __user *)mmsg;
2106        err = 0;
2107
2108        while (datagrams < vlen) {
2109                if (MSG_CMSG_COMPAT & flags) {
2110                        err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
2111                                             &msg_sys, flags, &used_address);
2112                        if (err < 0)
2113                                break;
2114                        err = __put_user(err, &compat_entry->msg_len);
2115                        ++compat_entry;
2116                } else {
2117                        err = ___sys_sendmsg(sock,
2118                                             (struct user_msghdr __user *)entry,
2119                                             &msg_sys, flags, &used_address);
2120                        if (err < 0)
2121                                break;
2122                        err = put_user(err, &entry->msg_len);
2123                        ++entry;
2124                }
2125
2126                if (err)
2127                        break;
2128                ++datagrams;
2129        }
2130
2131        fput_light(sock->file, fput_needed);
2132
2133        /* We only return an error if no datagrams were able to be sent */
2134        if (datagrams != 0)
2135                return datagrams;
2136
2137        return err;
2138}
2139
2140SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2141                unsigned int, vlen, unsigned int, flags)
2142{
2143        if (flags & MSG_CMSG_COMPAT)
2144                return -EINVAL;
2145        return __sys_sendmmsg(fd, mmsg, vlen, flags);
2146}
2147
2148static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2149                         struct msghdr *msg_sys, unsigned int flags, int nosec)
2150{
2151        struct compat_msghdr __user *msg_compat =
2152            (struct compat_msghdr __user *)msg;
2153        struct iovec iovstack[UIO_FASTIOV];
2154        struct iovec *iov = iovstack;
2155        unsigned long cmsg_ptr;
2156        int total_len, len;
2157        ssize_t err;
2158
2159        /* kernel mode address */
2160        struct sockaddr_storage addr;
2161
2162        /* user mode address pointers */
2163        struct sockaddr __user *uaddr;
2164        int __user *uaddr_len = COMPAT_NAMELEN(msg);
2165
2166        msg_sys->msg_name = &addr;
2167
2168        if (MSG_CMSG_COMPAT & flags)
2169                err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
2170        else
2171                err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
2172        if (err < 0)
2173                goto out_freeiov;
2174        total_len = err;
2175
2176        cmsg_ptr = (unsigned long)msg_sys->msg_control;
2177        msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2178
2179        /* We assume all kernel code knows the size of sockaddr_storage */
2180        msg_sys->msg_namelen = 0;
2181
2182        if (sock->file->f_flags & O_NONBLOCK)
2183                flags |= MSG_DONTWAIT;
2184        err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2185                                                          total_len, flags);
2186        if (err < 0)
2187                goto out_freeiov;
2188        len = err;
2189
2190        if (uaddr != NULL) {
2191                err = move_addr_to_user(&addr,
2192                                        msg_sys->msg_namelen, uaddr,
2193                                        uaddr_len);
2194                if (err < 0)
2195                        goto out_freeiov;
2196        }
2197        err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2198                         COMPAT_FLAGS(msg));
2199        if (err)
2200                goto out_freeiov;
2201        if (MSG_CMSG_COMPAT & flags)
2202                err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2203                                 &msg_compat->msg_controllen);
2204        else
2205                err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2206                                 &msg->msg_controllen);
2207        if (err)
2208                goto out_freeiov;
2209        err = len;
2210
2211out_freeiov:
2212        if (iov != iovstack)
2213                kfree(iov);
2214        return err;
2215}
2216
2217/*
2218 *      BSD recvmsg interface
2219 */
2220
2221long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
2222{
2223        int fput_needed, err;
2224        struct msghdr msg_sys;
2225        struct socket *sock;
2226
2227        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2228        if (!sock)
2229                goto out;
2230
2231        err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2232
2233        fput_light(sock->file, fput_needed);
2234out:
2235        return err;
2236}
2237
2238SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2239                unsigned int, flags)
2240{
2241        if (flags & MSG_CMSG_COMPAT)
2242                return -EINVAL;
2243        return __sys_recvmsg(fd, msg, flags);
2244}
2245
2246/*
2247 *     Linux recvmmsg interface
2248 */
2249
2250int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2251                   unsigned int flags, struct timespec *timeout)
2252{
2253        int fput_needed, err, datagrams;
2254        struct socket *sock;
2255        struct mmsghdr __user *entry;
2256        struct compat_mmsghdr __user *compat_entry;
2257        struct msghdr msg_sys;
2258        struct timespec end_time;
2259
2260        if (timeout &&
2261            poll_select_set_timeout(&end_time, timeout->tv_sec,
2262                                    timeout->tv_nsec))
2263                return -EINVAL;
2264
2265        datagrams = 0;
2266
2267        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2268        if (!sock)
2269                return err;
2270
2271        err = sock_error(sock->sk);
2272        if (err)
2273                goto out_put;
2274
2275        entry = mmsg;
2276        compat_entry = (struct compat_mmsghdr __user *)mmsg;
2277
2278        while (datagrams < vlen) {
2279                /*
2280                 * No need to ask LSM for more than the first datagram.
2281                 */
2282                if (MSG_CMSG_COMPAT & flags) {
2283                        err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
2284                                             &msg_sys, flags & ~MSG_WAITFORONE,
2285                                             datagrams);
2286                        if (err < 0)
2287                                break;
2288                        err = __put_user(err, &compat_entry->msg_len);
2289                        ++compat_entry;
2290                } else {
2291                        err = ___sys_recvmsg(sock,
2292                                             (struct user_msghdr __user *)entry,
2293                                             &msg_sys, flags & ~MSG_WAITFORONE,
2294                                             datagrams);
2295                        if (err < 0)
2296                                break;
2297                        err = put_user(err, &entry->msg_len);
2298                        ++entry;
2299                }
2300
2301                if (err)
2302                        break;
2303                ++datagrams;
2304
2305                /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2306                if (flags & MSG_WAITFORONE)
2307                        flags |= MSG_DONTWAIT;
2308
2309                if (timeout) {
2310                        ktime_get_ts(timeout);
2311                        *timeout = timespec_sub(end_time, *timeout);
2312                        if (timeout->tv_sec < 0) {
2313                                timeout->tv_sec = timeout->tv_nsec = 0;
2314                                break;
2315                        }
2316
2317                        /* Timeout, return less than vlen datagrams */
2318                        if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2319                                break;
2320                }
2321
2322                /* Out of band data, return right away */
2323                if (msg_sys.msg_flags & MSG_OOB)
2324                        break;
2325        }
2326
2327out_put:
2328        fput_light(sock->file, fput_needed);
2329
2330        if (err == 0)
2331                return datagrams;
2332
2333        if (datagrams != 0) {
2334                /*
2335                 * We may return less entries than requested (vlen) if the
2336                 * sock is non block and there aren't enough datagrams...
2337                 */
2338                if (err != -EAGAIN) {
2339                        /*
2340                         * ... or  if recvmsg returns an error after we
2341                         * received some datagrams, where we record the
2342                         * error to return on the next call or if the
2343                         * app asks about it using getsockopt(SO_ERROR).
2344                         */
2345                        sock->sk->sk_err = -err;
2346                }
2347
2348                return datagrams;
2349        }
2350
2351        return err;
2352}
2353
2354SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2355                unsigned int, vlen, unsigned int, flags,
2356                struct timespec __user *, timeout)
2357{
2358        int datagrams;
2359        struct timespec timeout_sys;
2360
2361        if (flags & MSG_CMSG_COMPAT)
2362                return -EINVAL;
2363
2364        if (!timeout)
2365                return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2366
2367        if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2368                return -EFAULT;
2369
2370        datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2371
2372        if (datagrams > 0 &&
2373            copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2374                datagrams = -EFAULT;
2375
2376        return datagrams;
2377}
2378
2379#ifdef __ARCH_WANT_SYS_SOCKETCALL
2380/* Argument list sizes for sys_socketcall */
2381#define AL(x) ((x) * sizeof(unsigned long))
2382static const unsigned char nargs[21] = {
2383        AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2384        AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2385        AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2386        AL(4), AL(5), AL(4)
2387};
2388
2389#undef AL
2390
2391/*
2392 *      System call vectors.
2393 *
2394 *      Argument checking cleaned up. Saved 20% in size.
2395 *  This function doesn't need to set the kernel lock because
2396 *  it is set by the callees.
2397 */
2398
2399SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2400{
2401        unsigned long a[AUDITSC_ARGS];
2402        unsigned long a0, a1;
2403        int err;
2404        unsigned int len;
2405
2406        if (call < 1 || call > SYS_SENDMMSG)
2407                return -EINVAL;
2408
2409        len = nargs[call];
2410        if (len > sizeof(a))
2411                return -EINVAL;
2412
2413        /* copy_from_user should be SMP safe. */
2414        if (copy_from_user(a, args, len))
2415                return -EFAULT;
2416
2417        err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2418        if (err)
2419                return err;
2420
2421        a0 = a[0];
2422        a1 = a[1];
2423
2424        switch (call) {
2425        case SYS_SOCKET:
2426                err = sys_socket(a0, a1, a[2]);
2427                break;
2428        case SYS_BIND:
2429                err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2430                break;
2431        case SYS_CONNECT:
2432                err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2433                break;
2434        case SYS_LISTEN:
2435                err = sys_listen(a0, a1);
2436                break;
2437        case SYS_ACCEPT:
2438                err = sys_accept4(a0, (struct sockaddr __user *)a1,
2439                                  (int __user *)a[2], 0);
2440                break;
2441        case SYS_GETSOCKNAME:
2442                err =
2443                    sys_getsockname(a0, (struct sockaddr __user *)a1,
2444                                    (int __user *)a[2]);
2445                break;
2446        case SYS_GETPEERNAME:
2447                err =
2448                    sys_getpeername(a0, (struct sockaddr __user *)a1,
2449                                    (int __user *)a[2]);
2450                break;
2451        case SYS_SOCKETPAIR:
2452                err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2453                break;
2454        case SYS_SEND:
2455                err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2456                break;
2457        case SYS_SENDTO:
2458                err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2459                                 (struct sockaddr __user *)a[4], a[5]);
2460                break;
2461        case SYS_RECV:
2462                err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2463                break;
2464        case SYS_RECVFROM:
2465                err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2466                                   (struct sockaddr __user *)a[4],
2467                                   (int __user *)a[5]);
2468                break;
2469        case SYS_SHUTDOWN:
2470                err = sys_shutdown(a0, a1);
2471                break;
2472        case SYS_SETSOCKOPT:
2473                err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2474                break;
2475        case SYS_GETSOCKOPT:
2476                err =
2477                    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2478                                   (int __user *)a[4]);
2479                break;
2480        case SYS_SENDMSG:
2481                err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
2482                break;
2483        case SYS_SENDMMSG:
2484                err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2485                break;
2486        case SYS_RECVMSG:
2487                err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
2488                break;
2489        case SYS_RECVMMSG:
2490                err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2491                                   (struct timespec __user *)a[4]);
2492                break;
2493        case SYS_ACCEPT4:
2494                err = sys_accept4(a0, (struct sockaddr __user *)a1,
2495                                  (int __user *)a[2], a[3]);
2496                break;
2497        default:
2498                err = -EINVAL;
2499                break;
2500        }
2501        return err;
2502}
2503
2504#endif                          /* __ARCH_WANT_SYS_SOCKETCALL */
2505
2506/**
2507 *      sock_register - add a socket protocol handler
2508 *      @ops: description of protocol
2509 *
2510 *      This function is called by a protocol handler that wants to
2511 *      advertise its address family, and have it linked into the
2512 *      socket interface. The value ops->family corresponds to the
2513 *      socket system call protocol family.
2514 */
2515int sock_register(const struct net_proto_family *ops)
2516{
2517        int err;
2518
2519        if (ops->family >= NPROTO) {
2520                pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
2521                return -ENOBUFS;
2522        }
2523
2524        spin_lock(&net_family_lock);
2525        if (rcu_dereference_protected(net_families[ops->family],
2526                                      lockdep_is_held(&net_family_lock)))
2527                err = -EEXIST;
2528        else {
2529                rcu_assign_pointer(net_families[ops->family], ops);
2530                err = 0;
2531        }
2532        spin_unlock(&net_family_lock);
2533
2534        pr_info("NET: Registered protocol family %d\n", ops->family);
2535        return err;
2536}
2537EXPORT_SYMBOL(sock_register);
2538
2539/**
2540 *      sock_unregister - remove a protocol handler
2541 *      @family: protocol family to remove
2542 *
2543 *      This function is called by a protocol handler that wants to
2544 *      remove its address family, and have it unlinked from the
2545 *      new socket creation.
2546 *
2547 *      If protocol handler is a module, then it can use module reference
2548 *      counts to protect against new references. If protocol handler is not
2549 *      a module then it needs to provide its own protection in
2550 *      the ops->create routine.
2551 */
2552void sock_unregister(int family)
2553{
2554        BUG_ON(family < 0 || family >= NPROTO);
2555
2556        spin_lock(&net_family_lock);
2557        RCU_INIT_POINTER(net_families[family], NULL);
2558        spin_unlock(&net_family_lock);
2559
2560        synchronize_rcu();
2561
2562        pr_info("NET: Unregistered protocol family %d\n", family);
2563}
2564EXPORT_SYMBOL(sock_unregister);
2565
2566static int __init sock_init(void)
2567{
2568        int err;
2569        /*
2570         *      Initialize the network sysctl infrastructure.
2571         */
2572        err = net_sysctl_init();
2573        if (err)
2574                goto out;
2575
2576        /*
2577         *      Initialize skbuff SLAB cache
2578         */
2579        skb_init();
2580
2581        /*
2582         *      Initialize the protocols module.
2583         */
2584
2585        init_inodecache();
2586
2587        err = register_filesystem(&sock_fs_type);
2588        if (err)
2589                goto out_fs;
2590        sock_mnt = kern_mount(&sock_fs_type);
2591        if (IS_ERR(sock_mnt)) {
2592                err = PTR_ERR(sock_mnt);
2593                goto out_mount;
2594        }
2595
2596        /* The real protocol initialization is performed in later initcalls.
2597         */
2598
2599#ifdef CONFIG_NETFILTER
2600        err = netfilter_init();
2601        if (err)
2602                goto out;
2603#endif
2604
2605        ptp_classifier_init();
2606
2607out:
2608        return err;
2609
2610out_mount:
2611        unregister_filesystem(&sock_fs_type);
2612out_fs:
2613        goto out;
2614}
2615
2616core_initcall(sock_init);       /* early initcall */
2617
2618#ifdef CONFIG_PROC_FS
2619void socket_seq_show(struct seq_file *seq)
2620{
2621        int cpu;
2622        int counter = 0;
2623
2624        for_each_possible_cpu(cpu)
2625            counter += per_cpu(sockets_in_use, cpu);
2626
2627        /* It can be negative, by the way. 8) */
2628        if (counter < 0)
2629                counter = 0;
2630
2631        seq_printf(seq, "sockets: used %d\n", counter);
2632}
2633#endif                          /* CONFIG_PROC_FS */
2634
2635#ifdef CONFIG_COMPAT
2636static int do_siocgstamp(struct net *net, struct socket *sock,
2637                         unsigned int cmd, void __user *up)
2638{
2639        mm_segment_t old_fs = get_fs();
2640        struct timeval ktv;
2641        int err;
2642
2643        set_fs(KERNEL_DS);
2644        err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
2645        set_fs(old_fs);
2646        if (!err)
2647                err = compat_put_timeval(&ktv, up);
2648
2649        return err;
2650}
2651
2652static int do_siocgstampns(struct net *net, struct socket *sock,
2653                           unsigned int cmd, void __user *up)
2654{
2655        mm_segment_t old_fs = get_fs();
2656        struct timespec kts;
2657        int err;
2658
2659        set_fs(KERNEL_DS);
2660        err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
2661        set_fs(old_fs);
2662        if (!err)
2663                err = compat_put_timespec(&kts, up);
2664
2665        return err;
2666}
2667
2668static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
2669{
2670        struct ifreq __user *uifr;
2671        int err;
2672
2673        uifr = compat_alloc_user_space(sizeof(struct ifreq));
2674        if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2675                return -EFAULT;
2676
2677        err = dev_ioctl(net, SIOCGIFNAME, uifr);
2678        if (err)
2679                return err;
2680
2681        if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
2682                return -EFAULT;
2683
2684        return 0;
2685}
2686
2687static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2688{
2689        struct compat_ifconf ifc32;
2690        struct ifconf ifc;
2691        struct ifconf __user *uifc;
2692        struct compat_ifreq __user *ifr32;
2693        struct ifreq __user *ifr;
2694        unsigned int i, j;
2695        int err;
2696
2697        if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
2698                return -EFAULT;
2699
2700        memset(&ifc, 0, sizeof(ifc));
2701        if (ifc32.ifcbuf == 0) {
2702                ifc32.ifc_len = 0;
2703                ifc.ifc_len = 0;
2704                ifc.ifc_req = NULL;
2705                uifc = compat_alloc_user_space(sizeof(struct ifconf));
2706        } else {
2707                size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2708                        sizeof(struct ifreq);
2709                uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2710                ifc.ifc_len = len;
2711                ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2712                ifr32 = compat_ptr(ifc32.ifcbuf);
2713                for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
2714                        if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
2715                                return -EFAULT;
2716                        ifr++;
2717                        ifr32++;
2718                }
2719        }
2720        if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2721                return -EFAULT;
2722
2723        err = dev_ioctl(net, SIOCGIFCONF, uifc);
2724        if (err)
2725                return err;
2726
2727        if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2728                return -EFAULT;
2729
2730        ifr = ifc.ifc_req;
2731        ifr32 = compat_ptr(ifc32.ifcbuf);
2732        for (i = 0, j = 0;
2733             i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2734             i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2735                if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
2736                        return -EFAULT;
2737                ifr32++;
2738                ifr++;
2739        }
2740
2741        if (ifc32.ifcbuf == 0) {
2742                /* Translate from 64-bit structure multiple to
2743                 * a 32-bit one.
2744                 */
2745                i = ifc.ifc_len;
2746                i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
2747                ifc32.ifc_len = i;
2748        } else {
2749                ifc32.ifc_len = i;
2750        }
2751        if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
2752                return -EFAULT;
2753
2754        return 0;
2755}
2756
2757static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2758{
2759        struct compat_ethtool_rxnfc __user *compat_rxnfc;
2760        bool convert_in = false, convert_out = false;
2761        size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2762        struct ethtool_rxnfc __user *rxnfc;
2763        struct ifreq __user *ifr;
2764        u32 rule_cnt = 0, actual_rule_cnt;
2765        u32 ethcmd;
2766        u32 data;
2767        int ret;
2768
2769        if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2770                return -EFAULT;
2771
2772        compat_rxnfc = compat_ptr(data);
2773
2774        if (get_user(ethcmd, &compat_rxnfc->cmd))
2775                return -EFAULT;
2776
2777        /* Most ethtool structures are defined without padding.
2778         * Unfortunately struct ethtool_rxnfc is an exception.
2779         */
2780        switch (ethcmd) {
2781        default:
2782                break;
2783        case ETHTOOL_GRXCLSRLALL:
2784                /* Buffer size is variable */
2785                if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2786                        return -EFAULT;
2787                if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2788                        return -ENOMEM;
2789                buf_size += rule_cnt * sizeof(u32);
2790                /* fall through */
2791        case ETHTOOL_GRXRINGS:
2792        case ETHTOOL_GRXCLSRLCNT:
2793        case ETHTOOL_GRXCLSRULE:
2794        case ETHTOOL_SRXCLSRLINS:
2795                convert_out = true;
2796                /* fall through */
2797        case ETHTOOL_SRXCLSRLDEL:
2798                buf_size += sizeof(struct ethtool_rxnfc);
2799                convert_in = true;
2800                break;
2801        }
2802
2803        ifr = compat_alloc_user_space(buf_size);
2804        rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
2805
2806        if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2807                return -EFAULT;
2808
2809        if (put_user(convert_in ? rxnfc : compat_ptr(data),
2810                     &ifr->ifr_ifru.ifru_data))
2811                return -EFAULT;
2812
2813        if (convert_in) {
2814                /* We expect there to be holes between fs.m_ext and
2815                 * fs.ring_cookie and at the end of fs, but nowhere else.
2816                 */
2817                BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2818                             sizeof(compat_rxnfc->fs.m_ext) !=
2819                             offsetof(struct ethtool_rxnfc, fs.m_ext) +
2820                             sizeof(rxnfc->fs.m_ext));
2821                BUILD_BUG_ON(
2822                        offsetof(struct compat_ethtool_rxnfc, fs.location) -
2823                        offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2824                        offsetof(struct ethtool_rxnfc, fs.location) -
2825                        offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2826
2827                if (copy_in_user(rxnfc, compat_rxnfc,
2828                                 (void __user *)(&rxnfc->fs.m_ext + 1) -
2829                                 (void __user *)rxnfc) ||
2830                    copy_in_user(&rxnfc->fs.ring_cookie,
2831                                 &compat_rxnfc->fs.ring_cookie,
2832                                 (void __user *)(&rxnfc->fs.location + 1) -
2833                                 (void __user *)&rxnfc->fs.ring_cookie) ||
2834                    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2835                                 sizeof(rxnfc->rule_cnt)))
2836                        return -EFAULT;
2837        }
2838
2839        ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2840        if (ret)
2841                return ret;
2842
2843        if (convert_out) {
2844                if (copy_in_user(compat_rxnfc, rxnfc,
2845                                 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2846                                 (const void __user *)rxnfc) ||
2847                    copy_in_user(&compat_rxnfc->fs.ring_cookie,
2848                                 &rxnfc->fs.ring_cookie,
2849                                 (const void __user *)(&rxnfc->fs.location + 1) -
2850                                 (const void __user *)&rxnfc->fs.ring_cookie) ||
2851                    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2852                                 sizeof(rxnfc->rule_cnt)))
2853                        return -EFAULT;
2854
2855                if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2856                        /* As an optimisation, we only copy the actual
2857                         * number of rules that the underlying
2858                         * function returned.  Since Mallory might
2859                         * change the rule count in user memory, we
2860                         * check that it is less than the rule count
2861                         * originally given (as the user buffer size),
2862                         * which has been range-checked.
2863                         */
2864                        if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2865                                return -EFAULT;
2866                        if (actual_rule_cnt < rule_cnt)
2867                                rule_cnt = actual_rule_cnt;
2868                        if (copy_in_user(&compat_rxnfc->rule_locs[0],
2869                                         &rxnfc->rule_locs[0],
2870                                         rule_cnt * sizeof(u32)))
2871                                return -EFAULT;
2872                }
2873        }
2874
2875        return 0;
2876}
2877
2878static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2879{
2880        void __user *uptr;
2881        compat_uptr_t uptr32;
2882        struct ifreq __user *uifr;
2883
2884        uifr = compat_alloc_user_space(sizeof(*uifr));
2885        if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2886                return -EFAULT;
2887
2888        if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2889                return -EFAULT;
2890
2891        uptr = compat_ptr(uptr32);
2892
2893        if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2894                return -EFAULT;
2895
2896        return dev_ioctl(net, SIOCWANDEV, uifr);
2897}
2898
2899static int bond_ioctl(struct net *net, unsigned int cmd,
2900                         struct compat_ifreq __user *ifr32)
2901{
2902        struct ifreq kifr;
2903        mm_segment_t old_fs;
2904        int err;
2905
2906        switch (cmd) {
2907        case SIOCBONDENSLAVE:
2908        case SIOCBONDRELEASE:
2909        case SIOCBONDSETHWADDR:
2910        case SIOCBONDCHANGEACTIVE:
2911                if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
2912                        return -EFAULT;
2913
2914                old_fs = get_fs();
2915                set_fs(KERNEL_DS);
2916                err = dev_ioctl(net, cmd,
2917                                (struct ifreq __user __force *) &kifr);
2918                set_fs(old_fs);
2919
2920                return err;
2921        default:
2922                return -ENOIOCTLCMD;
2923        }
2924}
2925
2926/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2927static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
2928                                 struct compat_ifreq __user *u_ifreq32)
2929{
2930        struct ifreq __user *u_ifreq64;
2931        char tmp_buf[IFNAMSIZ];
2932        void __user *data64;
2933        u32 data32;
2934
2935        if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2936                           IFNAMSIZ))
2937                return -EFAULT;
2938        if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2939                return -EFAULT;
2940        data64 = compat_ptr(data32);
2941
2942        u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2943
2944        if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2945                         IFNAMSIZ))
2946                return -EFAULT;
2947        if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2948                return -EFAULT;
2949
2950        return dev_ioctl(net, cmd, u_ifreq64);
2951}
2952
2953static int dev_ifsioc(struct net *net, struct socket *sock,
2954                         unsigned int cmd, struct compat_ifreq __user *uifr32)
2955{
2956        struct ifreq __user *uifr;
2957        int err;
2958
2959        uifr = compat_alloc_user_space(sizeof(*uifr));
2960        if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2961                return -EFAULT;
2962
2963        err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2964
2965        if (!err) {
2966                switch (cmd) {
2967                case SIOCGIFFLAGS:
2968                case SIOCGIFMETRIC:
2969                case SIOCGIFMTU:
2970                case SIOCGIFMEM:
2971                case SIOCGIFHWADDR:
2972                case SIOCGIFINDEX:
2973                case SIOCGIFADDR:
2974                case SIOCGIFBRDADDR:
2975                case SIOCGIFDSTADDR:
2976                case SIOCGIFNETMASK:
2977                case SIOCGIFPFLAGS:
2978                case SIOCGIFTXQLEN:
2979                case SIOCGMIIPHY:
2980                case SIOCGMIIREG:
2981                        if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
2982                                err = -EFAULT;
2983                        break;
2984                }
2985        }
2986        return err;
2987}
2988
2989static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2990                        struct compat_ifreq __user *uifr32)
2991{
2992        struct ifreq ifr;
2993        struct compat_ifmap __user *uifmap32;
2994        mm_segment_t old_fs;
2995        int err;
2996
2997        uifmap32 = &uifr32->ifr_ifru.ifru_map;
2998        err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2999        err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3000        err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3001        err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3002        err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3003        err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3004        err |= get_user(ifr.ifr_map.port, &uifmap32->port);
3005        if (err)
3006                return -EFAULT;
3007
3008        old_fs = get_fs();
3009        set_fs(KERNEL_DS);
3010        err = dev_ioctl(net, cmd, (void  __user __force *)&ifr);
3011        set_fs(old_fs);
3012
3013        if (cmd == SIOCGIFMAP && !err) {
3014                err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3015                err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3016                err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3017                err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3018                err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3019                err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3020                err |= put_user(ifr.ifr_map.port, &uifmap32->port);
3021                if (err)
3022                        err = -EFAULT;
3023        }
3024        return err;
3025}
3026
3027struct rtentry32 {
3028        u32             rt_pad1;
3029        struct sockaddr rt_dst;         /* target address               */
3030        struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
3031        struct sockaddr rt_genmask;     /* target network mask (IP)     */
3032        unsigned short  rt_flags;
3033        short           rt_pad2;
3034        u32             rt_pad3;
3035        unsigned char   rt_tos;
3036        unsigned char   rt_class;
3037        short           rt_pad4;
3038        short           rt_metric;      /* +1 for binary compatibility! */
3039        /* char * */ u32 rt_dev;        /* forcing the device at add    */
3040        u32             rt_mtu;         /* per route MTU/Window         */
3041        u32             rt_window;      /* Window clamping              */
3042        unsigned short  rt_irtt;        /* Initial RTT                  */
3043};
3044
3045struct in6_rtmsg32 {
3046        struct in6_addr         rtmsg_dst;
3047        struct in6_addr         rtmsg_src;
3048        struct in6_addr         rtmsg_gateway;
3049        u32                     rtmsg_type;
3050        u16                     rtmsg_dst_len;
3051        u16                     rtmsg_src_len;
3052        u32                     rtmsg_metric;
3053        u32                     rtmsg_info;
3054        u32                     rtmsg_flags;
3055        s32                     rtmsg_ifindex;
3056};
3057
3058static int routing_ioctl(struct net *net, struct socket *sock,
3059                         unsigned int cmd, void __user *argp)
3060{
3061        int ret;
3062        void *r = NULL;
3063        struct in6_rtmsg r6;
3064        struct rtentry r4;
3065        char devname[16];
3066        u32 rtdev;
3067        mm_segment_t old_fs = get_fs();
3068
3069        if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3070                struct in6_rtmsg32 __user *ur6 = argp;
3071                ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3072                        3 * sizeof(struct in6_addr));
3073                ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3074                ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3075                ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3076                ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3077                ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3078                ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3079                ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
3080
3081                r = (void *) &r6;
3082        } else { /* ipv4 */
3083                struct rtentry32 __user *ur4 = argp;
3084                ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3085                                        3 * sizeof(struct sockaddr));
3086                ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3087                ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3088                ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3089                ret |= get_user(r4.rt_window, &(ur4->rt_window));
3090                ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3091                ret |= get_user(rtdev, &(ur4->rt_dev));
3092                if (rtdev) {
3093                        ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
3094                        r4.rt_dev = (char __user __force *)devname;
3095                        devname[15] = 0;
3096                } else
3097                        r4.rt_dev = NULL;
3098
3099                r = (void *) &r4;
3100        }
3101
3102        if (ret) {
3103                ret = -EFAULT;
3104                goto out;
3105        }
3106
3107        set_fs(KERNEL_DS);
3108        ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
3109        set_fs(old_fs);
3110
3111out:
3112        return ret;
3113}
3114
3115/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3116 * for some operations; this forces use of the newer bridge-utils that
3117 * use compatible ioctls
3118 */
3119static int old_bridge_ioctl(compat_ulong_t __user *argp)
3120{
3121        compat_ulong_t tmp;
3122
3123        if (get_user(tmp, argp))
3124                return -EFAULT;
3125        if (tmp == BRCTL_GET_VERSION)
3126                return BRCTL_VERSION + 1;
3127        return -EINVAL;
3128}
3129
3130static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3131                         unsigned int cmd, unsigned long arg)
3132{
3133        void __user *argp = compat_ptr(arg);
3134        struct sock *sk = sock->sk;
3135        struct net *net = sock_net(sk);
3136
3137        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3138                return compat_ifr_data_ioctl(net, cmd, argp);
3139
3140        switch (cmd) {
3141        case SIOCSIFBR:
3142        case SIOCGIFBR:
3143                return old_bridge_ioctl(argp);
3144        case SIOCGIFNAME:
3145                return dev_ifname32(net, argp);
3146        case SIOCGIFCONF:
3147                return dev_ifconf(net, argp);
3148        case SIOCETHTOOL:
3149                return ethtool_ioctl(net, argp);
3150        case SIOCWANDEV:
3151                return compat_siocwandev(net, argp);
3152        case SIOCGIFMAP:
3153        case SIOCSIFMAP:
3154                return compat_sioc_ifmap(net, cmd, argp);
3155        case SIOCBONDENSLAVE:
3156        case SIOCBONDRELEASE:
3157        case SIOCBONDSETHWADDR:
3158        case SIOCBONDCHANGEACTIVE:
3159                return bond_ioctl(net, cmd, argp);
3160        case SIOCADDRT:
3161        case SIOCDELRT:
3162                return routing_ioctl(net, sock, cmd, argp);
3163        case SIOCGSTAMP:
3164                return do_siocgstamp(net, sock, cmd, argp);
3165        case SIOCGSTAMPNS:
3166                return do_siocgstampns(net, sock, cmd, argp);
3167        case SIOCBONDSLAVEINFOQUERY:
3168        case SIOCBONDINFOQUERY:
3169        case SIOCSHWTSTAMP:
3170        case SIOCGHWTSTAMP:
3171                return compat_ifr_data_ioctl(net, cmd, argp);
3172
3173        case FIOSETOWN:
3174        case SIOCSPGRP:
3175        case FIOGETOWN:
3176        case SIOCGPGRP:
3177        case SIOCBRADDBR:
3178        case SIOCBRDELBR:
3179        case SIOCGIFVLAN:
3180        case SIOCSIFVLAN:
3181        case SIOCADDDLCI:
3182        case SIOCDELDLCI:
3183                return sock_ioctl(file, cmd, arg);
3184
3185        case SIOCGIFFLAGS:
3186        case SIOCSIFFLAGS:
3187        case SIOCGIFMETRIC:
3188        case SIOCSIFMETRIC:
3189        case SIOCGIFMTU:
3190        case SIOCSIFMTU:
3191        case SIOCGIFMEM:
3192        case SIOCSIFMEM:
3193        case SIOCGIFHWADDR:
3194        case SIOCSIFHWADDR:
3195        case SIOCADDMULTI:
3196        case SIOCDELMULTI:
3197        case SIOCGIFINDEX:
3198        case SIOCGIFADDR:
3199        case SIOCSIFADDR:
3200        case SIOCSIFHWBROADCAST:
3201        case SIOCDIFADDR:
3202        case SIOCGIFBRDADDR:
3203        case SIOCSIFBRDADDR:
3204        case SIOCGIFDSTADDR:
3205        case SIOCSIFDSTADDR:
3206        case SIOCGIFNETMASK:
3207        case SIOCSIFNETMASK:
3208        case SIOCSIFPFLAGS:
3209        case SIOCGIFPFLAGS:
3210        case SIOCGIFTXQLEN:
3211        case SIOCSIFTXQLEN:
3212        case SIOCBRADDIF:
3213        case SIOCBRDELIF:
3214        case SIOCSIFNAME:
3215        case SIOCGMIIPHY:
3216        case SIOCGMIIREG:
3217        case SIOCSMIIREG:
3218                return dev_ifsioc(net, sock, cmd, argp);
3219
3220        case SIOCSARP:
3221        case SIOCGARP:
3222        case SIOCDARP:
3223        case SIOCATMARK:
3224                return sock_do_ioctl(net, sock, cmd, arg);
3225        }
3226
3227        return -ENOIOCTLCMD;
3228}
3229
3230static long compat_sock_ioctl(struct file *file, unsigned int cmd,
3231                              unsigned long arg)
3232{
3233        struct socket *sock = file->private_data;
3234        int ret = -ENOIOCTLCMD;
3235        struct sock *sk;
3236        struct net *net;
3237
3238        sk = sock->sk;
3239        net = sock_net(sk);
3240
3241        if (sock->ops->compat_ioctl)
3242                ret = sock->ops->compat_ioctl(sock, cmd, arg);
3243
3244        if (ret == -ENOIOCTLCMD &&
3245            (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3246                ret = compat_wext_handle_ioctl(net, cmd, arg);
3247
3248        if (ret == -ENOIOCTLCMD)
3249                ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3250
3251        return ret;
3252}
3253#endif
3254
3255int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3256{
3257        return sock->ops->bind(sock, addr, addrlen);
3258}
3259EXPORT_SYMBOL(kernel_bind);
3260
3261int kernel_listen(struct socket *sock, int backlog)
3262{
3263        return sock->ops->listen(sock, backlog);
3264}
3265EXPORT_SYMBOL(kernel_listen);
3266
3267int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3268{
3269        struct sock *sk = sock->sk;
3270        int err;
3271
3272        err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3273                               newsock);
3274        if (err < 0)
3275                goto done;
3276
3277        err = sock->ops->accept(sock, *newsock, flags);
3278        if (err < 0) {
3279                sock_release(*newsock);
3280                *newsock = NULL;
3281                goto done;
3282        }
3283
3284        (*newsock)->ops = sock->ops;
3285        __module_get((*newsock)->ops->owner);
3286
3287done:
3288        return err;
3289}
3290EXPORT_SYMBOL(kernel_accept);
3291
3292int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3293                   int flags)
3294{
3295        return sock->ops->connect(sock, addr, addrlen, flags);
3296}
3297EXPORT_SYMBOL(kernel_connect);
3298
3299int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3300                         int *addrlen)
3301{
3302        return sock->ops->getname(sock, addr, addrlen, 0);
3303}
3304EXPORT_SYMBOL(kernel_getsockname);
3305
3306int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3307                         int *addrlen)
3308{
3309        return sock->ops->getname(sock, addr, addrlen, 1);
3310}
3311EXPORT_SYMBOL(kernel_getpeername);
3312
3313int kernel_getsockopt(struct socket *sock, int level, int optname,
3314                        char *optval, int *optlen)
3315{
3316        mm_segment_t oldfs = get_fs();
3317        char __user *uoptval;
3318        int __user *uoptlen;
3319        int err;
3320
3321        uoptval = (char __user __force *) optval;
3322        uoptlen = (int __user __force *) optlen;
3323
3324        set_fs(KERNEL_DS);
3325        if (level == SOL_SOCKET)
3326                err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3327        else
3328                err = sock->ops->getsockopt(sock, level, optname, uoptval,
3329                                            uoptlen);
3330        set_fs(oldfs);
3331        return err;
3332}
3333EXPORT_SYMBOL(kernel_getsockopt);
3334
3335int kernel_setsockopt(struct socket *sock, int level, int optname,
3336                        char *optval, unsigned int optlen)
3337{
3338        mm_segment_t oldfs = get_fs();
3339        char __user *uoptval;
3340        int err;
3341
3342        uoptval = (char __user __force *) optval;
3343
3344        set_fs(KERNEL_DS);
3345        if (level == SOL_SOCKET)
3346                err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3347        else
3348                err = sock->ops->setsockopt(sock, level, optname, uoptval,
3349                                            optlen);
3350        set_fs(oldfs);
3351        return err;
3352}
3353EXPORT_SYMBOL(kernel_setsockopt);
3354
3355int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3356                    size_t size, int flags)
3357{
3358        if (sock->ops->sendpage)
3359                return sock->ops->sendpage(sock, page, offset, size, flags);
3360
3361        return sock_no_sendpage(sock, page, offset, size, flags);
3362}
3363EXPORT_SYMBOL(kernel_sendpage);
3364
3365int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3366{
3367        mm_segment_t oldfs = get_fs();
3368        int err;
3369
3370        set_fs(KERNEL_DS);
3371        err = sock->ops->ioctl(sock, cmd, arg);
3372        set_fs(oldfs);
3373
3374        return err;
3375}
3376EXPORT_SYMBOL(kernel_sock_ioctl);
3377
3378int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3379{
3380        return sock->ops->shutdown(sock, how);
3381}
3382EXPORT_SYMBOL(kernel_sock_shutdown);
3383