linux/net/ipv4/ip_sockglue.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              The IP to API glue.
   7 *
   8 * Authors:     see ip.c
   9 *
  10 * Fixes:
  11 *              Many            :       Split from ip.c , see ip.c for history.
  12 *              Martin Mares    :       TOS setting fixed.
  13 *              Alan Cox        :       Fixed a couple of oopses in Martin's
  14 *                                      TOS tweaks.
  15 *              Mike McLagan    :       Routing by source
  16 */
  17
  18#include <linux/module.h>
  19#include <linux/types.h>
  20#include <linux/mm.h>
  21#include <linux/skbuff.h>
  22#include <linux/ip.h>
  23#include <linux/icmp.h>
  24#include <linux/inetdevice.h>
  25#include <linux/netdevice.h>
  26#include <linux/slab.h>
  27#include <net/sock.h>
  28#include <net/ip.h>
  29#include <net/icmp.h>
  30#include <net/tcp_states.h>
  31#include <linux/udp.h>
  32#include <linux/igmp.h>
  33#include <linux/netfilter.h>
  34#include <linux/route.h>
  35#include <linux/mroute.h>
  36#include <net/inet_ecn.h>
  37#include <net/route.h>
  38#include <net/xfrm.h>
  39#include <net/compat.h>
  40#include <net/checksum.h>
  41#if IS_ENABLED(CONFIG_IPV6)
  42#include <net/transp_v6.h>
  43#endif
  44#include <net/ip_fib.h>
  45
  46#include <linux/errqueue.h>
  47#include <asm/uaccess.h>
  48
  49/*
  50 *      SOL_IP control messages.
  51 */
  52
  53static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
  54{
  55        struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
  56
  57        info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
  58
  59        put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
  60}
  61
  62static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
  63{
  64        int ttl = ip_hdr(skb)->ttl;
  65        put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
  66}
  67
  68static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
  69{
  70        put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
  71}
  72
  73static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
  74{
  75        if (IPCB(skb)->opt.optlen == 0)
  76                return;
  77
  78        put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
  79                 ip_hdr(skb) + 1);
  80}
  81
  82
  83static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
  84{
  85        unsigned char optbuf[sizeof(struct ip_options) + 40];
  86        struct ip_options *opt = (struct ip_options *)optbuf;
  87
  88        if (IPCB(skb)->opt.optlen == 0)
  89                return;
  90
  91        if (ip_options_echo(opt, skb)) {
  92                msg->msg_flags |= MSG_CTRUNC;
  93                return;
  94        }
  95        ip_options_undo(opt);
  96
  97        put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
  98}
  99
 100static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
 101                                  int tlen, int offset)
 102{
 103        __wsum csum = skb->csum;
 104
 105        if (skb->ip_summed != CHECKSUM_COMPLETE)
 106                return;
 107
 108        if (offset != 0)
 109                csum = csum_sub(csum,
 110                                csum_partial(skb_transport_header(skb) + tlen,
 111                                             offset, 0));
 112
 113        put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
 114}
 115
 116static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 117{
 118        char *secdata;
 119        u32 seclen, secid;
 120        int err;
 121
 122        err = security_socket_getpeersec_dgram(NULL, skb, &secid);
 123        if (err)
 124                return;
 125
 126        err = security_secid_to_secctx(secid, &secdata, &seclen);
 127        if (err)
 128                return;
 129
 130        put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
 131        security_release_secctx(secdata, seclen);
 132}
 133
 134static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 135{
 136        struct sockaddr_in sin;
 137        const struct iphdr *iph = ip_hdr(skb);
 138        __be16 *ports = (__be16 *)skb_transport_header(skb);
 139
 140        if (skb_transport_offset(skb) + 4 > skb->len)
 141                return;
 142
 143        /* All current transport protocols have the port numbers in the
 144         * first four bytes of the transport header and this function is
 145         * written with this assumption in mind.
 146         */
 147
 148        sin.sin_family = AF_INET;
 149        sin.sin_addr.s_addr = iph->daddr;
 150        sin.sin_port = ports[1];
 151        memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 152
 153        put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 154}
 155
 156void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
 157                         int tlen, int offset)
 158{
 159        struct inet_sock *inet = inet_sk(skb->sk);
 160        unsigned int flags = inet->cmsg_flags;
 161
 162        /* Ordered by supposed usage frequency */
 163        if (flags & IP_CMSG_PKTINFO) {
 164                ip_cmsg_recv_pktinfo(msg, skb);
 165
 166                flags &= ~IP_CMSG_PKTINFO;
 167                if (!flags)
 168                        return;
 169        }
 170
 171        if (flags & IP_CMSG_TTL) {
 172                ip_cmsg_recv_ttl(msg, skb);
 173
 174                flags &= ~IP_CMSG_TTL;
 175                if (!flags)
 176                        return;
 177        }
 178
 179        if (flags & IP_CMSG_TOS) {
 180                ip_cmsg_recv_tos(msg, skb);
 181
 182                flags &= ~IP_CMSG_TOS;
 183                if (!flags)
 184                        return;
 185        }
 186
 187        if (flags & IP_CMSG_RECVOPTS) {
 188                ip_cmsg_recv_opts(msg, skb);
 189
 190                flags &= ~IP_CMSG_RECVOPTS;
 191                if (!flags)
 192                        return;
 193        }
 194
 195        if (flags & IP_CMSG_RETOPTS) {
 196                ip_cmsg_recv_retopts(msg, skb);
 197
 198                flags &= ~IP_CMSG_RETOPTS;
 199                if (!flags)
 200                        return;
 201        }
 202
 203        if (flags & IP_CMSG_PASSSEC) {
 204                ip_cmsg_recv_security(msg, skb);
 205
 206                flags &= ~IP_CMSG_PASSSEC;
 207                if (!flags)
 208                        return;
 209        }
 210
 211        if (flags & IP_CMSG_ORIGDSTADDR) {
 212                ip_cmsg_recv_dstaddr(msg, skb);
 213
 214                flags &= ~IP_CMSG_ORIGDSTADDR;
 215                if (!flags)
 216                        return;
 217        }
 218
 219        if (flags & IP_CMSG_CHECKSUM)
 220                ip_cmsg_recv_checksum(msg, skb, tlen, offset);
 221}
 222EXPORT_SYMBOL(ip_cmsg_recv_offset);
 223
 224int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 225                 bool allow_ipv6)
 226{
 227        int err, val;
 228        struct cmsghdr *cmsg;
 229        struct net *net = sock_net(sk);
 230
 231        for_each_cmsghdr(cmsg, msg) {
 232                if (!CMSG_OK(msg, cmsg))
 233                        return -EINVAL;
 234#if IS_ENABLED(CONFIG_IPV6)
 235                if (allow_ipv6 &&
 236                    cmsg->cmsg_level == SOL_IPV6 &&
 237                    cmsg->cmsg_type == IPV6_PKTINFO) {
 238                        struct in6_pktinfo *src_info;
 239
 240                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
 241                                return -EINVAL;
 242                        src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 243                        if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 244                                return -EINVAL;
 245                        ipc->oif = src_info->ipi6_ifindex;
 246                        ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 247                        continue;
 248                }
 249#endif
 250                if (cmsg->cmsg_level == SOL_SOCKET) {
 251                        err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
 252                        if (err)
 253                                return err;
 254                        continue;
 255                }
 256
 257                if (cmsg->cmsg_level != SOL_IP)
 258                        continue;
 259                switch (cmsg->cmsg_type) {
 260                case IP_RETOPTS:
 261                        err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
 262
 263                        /* Our caller is responsible for freeing ipc->opt */
 264                        err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
 265                                             err < 40 ? err : 40);
 266                        if (err)
 267                                return err;
 268                        break;
 269                case IP_PKTINFO:
 270                {
 271                        struct in_pktinfo *info;
 272                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 273                                return -EINVAL;
 274                        info = (struct in_pktinfo *)CMSG_DATA(cmsg);
 275                        ipc->oif = info->ipi_ifindex;
 276                        ipc->addr = info->ipi_spec_dst.s_addr;
 277                        break;
 278                }
 279                case IP_TTL:
 280                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
 281                                return -EINVAL;
 282                        val = *(int *)CMSG_DATA(cmsg);
 283                        if (val < 1 || val > 255)
 284                                return -EINVAL;
 285                        ipc->ttl = val;
 286                        break;
 287                case IP_TOS:
 288                        if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
 289                                val = *(int *)CMSG_DATA(cmsg);
 290                        else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
 291                                val = *(u8 *)CMSG_DATA(cmsg);
 292                        else
 293                                return -EINVAL;
 294                        if (val < 0 || val > 255)
 295                                return -EINVAL;
 296                        ipc->tos = val;
 297                        ipc->priority = rt_tos2priority(ipc->tos);
 298                        break;
 299
 300                default:
 301                        return -EINVAL;
 302                }
 303        }
 304        return 0;
 305}
 306
 307
 308/* Special input handler for packets caught by router alert option.
 309   They are selected only by protocol field, and then processed likely
 310   local ones; but only if someone wants them! Otherwise, router
 311   not running rsvpd will kill RSVP.
 312
 313   It is user level problem, what it will make with them.
 314   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
 315   but receiver should be enough clever f.e. to forward mtrace requests,
 316   sent to multicast group to reach destination designated router.
 317 */
 318struct ip_ra_chain __rcu *ip_ra_chain;
 319static DEFINE_SPINLOCK(ip_ra_lock);
 320
 321
 322static void ip_ra_destroy_rcu(struct rcu_head *head)
 323{
 324        struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
 325
 326        sock_put(ra->saved_sk);
 327        kfree(ra);
 328}
 329
 330int ip_ra_control(struct sock *sk, unsigned char on,
 331                  void (*destructor)(struct sock *))
 332{
 333        struct ip_ra_chain *ra, *new_ra;
 334        struct ip_ra_chain __rcu **rap;
 335
 336        if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 337                return -EINVAL;
 338
 339        new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 340
 341        spin_lock_bh(&ip_ra_lock);
 342        for (rap = &ip_ra_chain;
 343             (ra = rcu_dereference_protected(*rap,
 344                        lockdep_is_held(&ip_ra_lock))) != NULL;
 345             rap = &ra->next) {
 346                if (ra->sk == sk) {
 347                        if (on) {
 348                                spin_unlock_bh(&ip_ra_lock);
 349                                kfree(new_ra);
 350                                return -EADDRINUSE;
 351                        }
 352                        /* dont let ip_call_ra_chain() use sk again */
 353                        ra->sk = NULL;
 354                        RCU_INIT_POINTER(*rap, ra->next);
 355                        spin_unlock_bh(&ip_ra_lock);
 356
 357                        if (ra->destructor)
 358                                ra->destructor(sk);
 359                        /*
 360                         * Delay sock_put(sk) and kfree(ra) after one rcu grace
 361                         * period. This guarantee ip_call_ra_chain() dont need
 362                         * to mess with socket refcounts.
 363                         */
 364                        ra->saved_sk = sk;
 365                        call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 366                        return 0;
 367                }
 368        }
 369        if (!new_ra) {
 370                spin_unlock_bh(&ip_ra_lock);
 371                return -ENOBUFS;
 372        }
 373        new_ra->sk = sk;
 374        new_ra->destructor = destructor;
 375
 376        RCU_INIT_POINTER(new_ra->next, ra);
 377        rcu_assign_pointer(*rap, new_ra);
 378        sock_hold(sk);
 379        spin_unlock_bh(&ip_ra_lock);
 380
 381        return 0;
 382}
 383
 384void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 385                   __be16 port, u32 info, u8 *payload)
 386{
 387        struct sock_exterr_skb *serr;
 388
 389        skb = skb_clone(skb, GFP_ATOMIC);
 390        if (!skb)
 391                return;
 392
 393        serr = SKB_EXT_ERR(skb);
 394        serr->ee.ee_errno = err;
 395        serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
 396        serr->ee.ee_type = icmp_hdr(skb)->type;
 397        serr->ee.ee_code = icmp_hdr(skb)->code;
 398        serr->ee.ee_pad = 0;
 399        serr->ee.ee_info = info;
 400        serr->ee.ee_data = 0;
 401        serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
 402                                   skb_network_header(skb);
 403        serr->port = port;
 404
 405        if (skb_pull(skb, payload - skb->data)) {
 406                skb_reset_transport_header(skb);
 407                if (sock_queue_err_skb(sk, skb) == 0)
 408                        return;
 409        }
 410        kfree_skb(skb);
 411}
 412
 413void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
 414{
 415        struct inet_sock *inet = inet_sk(sk);
 416        struct sock_exterr_skb *serr;
 417        struct iphdr *iph;
 418        struct sk_buff *skb;
 419
 420        if (!inet->recverr)
 421                return;
 422
 423        skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
 424        if (!skb)
 425                return;
 426
 427        skb_put(skb, sizeof(struct iphdr));
 428        skb_reset_network_header(skb);
 429        iph = ip_hdr(skb);
 430        iph->daddr = daddr;
 431
 432        serr = SKB_EXT_ERR(skb);
 433        serr->ee.ee_errno = err;
 434        serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
 435        serr->ee.ee_type = 0;
 436        serr->ee.ee_code = 0;
 437        serr->ee.ee_pad = 0;
 438        serr->ee.ee_info = info;
 439        serr->ee.ee_data = 0;
 440        serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 441        serr->port = port;
 442
 443        __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 444        skb_reset_transport_header(skb);
 445
 446        if (sock_queue_err_skb(sk, skb))
 447                kfree_skb(skb);
 448}
 449
 450/* For some errors we have valid addr_offset even with zero payload and
 451 * zero port. Also, addr_offset should be supported if port is set.
 452 */
 453static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
 454{
 455        return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
 456               serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
 457}
 458
 459/* IPv4 supports cmsg on all imcp errors and some timestamps
 460 *
 461 * Timestamp code paths do not initialize the fields expected by cmsg:
 462 * the PKTINFO fields in skb->cb[]. Fill those in here.
 463 */
 464static bool ipv4_datagram_support_cmsg(const struct sock *sk,
 465                                       struct sk_buff *skb,
 466                                       int ee_origin)
 467{
 468        struct in_pktinfo *info;
 469
 470        if (ee_origin == SO_EE_ORIGIN_ICMP)
 471                return true;
 472
 473        if (ee_origin == SO_EE_ORIGIN_LOCAL)
 474                return false;
 475
 476        /* Support IP_PKTINFO on tstamp packets if requested, to correlate
 477         * timestamp with egress dev. Not possible for packets without dev
 478         * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
 479         */
 480        if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
 481            (!skb->dev))
 482                return false;
 483
 484        info = PKTINFO_SKB_CB(skb);
 485        info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
 486        info->ipi_ifindex = skb->dev->ifindex;
 487        return true;
 488}
 489
 490/*
 491 *      Handle MSG_ERRQUEUE
 492 */
 493int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 494{
 495        struct sock_exterr_skb *serr;
 496        struct sk_buff *skb;
 497        DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 498        struct {
 499                struct sock_extended_err ee;
 500                struct sockaddr_in       offender;
 501        } errhdr;
 502        int err;
 503        int copied;
 504
 505        WARN_ON_ONCE(sk->sk_family == AF_INET6);
 506
 507        err = -EAGAIN;
 508        skb = sock_dequeue_err_skb(sk);
 509        if (!skb)
 510                goto out;
 511
 512        copied = skb->len;
 513        if (copied > len) {
 514                msg->msg_flags |= MSG_TRUNC;
 515                copied = len;
 516        }
 517        err = skb_copy_datagram_msg(skb, 0, msg, copied);
 518        if (unlikely(err)) {
 519                kfree_skb(skb);
 520                return err;
 521        }
 522        sock_recv_timestamp(msg, sk, skb);
 523
 524        serr = SKB_EXT_ERR(skb);
 525
 526        if (sin && ipv4_datagram_support_addr(serr)) {
 527                sin->sin_family = AF_INET;
 528                sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 529                                                   serr->addr_offset);
 530                sin->sin_port = serr->port;
 531                memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 532                *addr_len = sizeof(*sin);
 533        }
 534
 535        memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 536        sin = &errhdr.offender;
 537        memset(sin, 0, sizeof(*sin));
 538
 539        if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
 540                sin->sin_family = AF_INET;
 541                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 542                if (inet_sk(sk)->cmsg_flags)
 543                        ip_cmsg_recv(msg, skb);
 544        }
 545
 546        put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
 547
 548        /* Now we could try to dump offended packet options */
 549
 550        msg->msg_flags |= MSG_ERRQUEUE;
 551        err = copied;
 552
 553        consume_skb(skb);
 554out:
 555        return err;
 556}
 557
 558
 559/*
 560 *      Socket option code for IP. This is the end of the line after any
 561 *      TCP,UDP etc options on an IP socket.
 562 */
 563static bool setsockopt_needs_rtnl(int optname)
 564{
 565        switch (optname) {
 566        case IP_ADD_MEMBERSHIP:
 567        case IP_ADD_SOURCE_MEMBERSHIP:
 568        case IP_BLOCK_SOURCE:
 569        case IP_DROP_MEMBERSHIP:
 570        case IP_DROP_SOURCE_MEMBERSHIP:
 571        case IP_MSFILTER:
 572        case IP_UNBLOCK_SOURCE:
 573        case MCAST_BLOCK_SOURCE:
 574        case MCAST_MSFILTER:
 575        case MCAST_JOIN_GROUP:
 576        case MCAST_JOIN_SOURCE_GROUP:
 577        case MCAST_LEAVE_GROUP:
 578        case MCAST_LEAVE_SOURCE_GROUP:
 579        case MCAST_UNBLOCK_SOURCE:
 580                return true;
 581        }
 582        return false;
 583}
 584
 585static int do_ip_setsockopt(struct sock *sk, int level,
 586                            int optname, char __user *optval, unsigned int optlen)
 587{
 588        struct inet_sock *inet = inet_sk(sk);
 589        struct net *net = sock_net(sk);
 590        int val = 0, err;
 591        bool needs_rtnl = setsockopt_needs_rtnl(optname);
 592
 593        switch (optname) {
 594        case IP_PKTINFO:
 595        case IP_RECVTTL:
 596        case IP_RECVOPTS:
 597        case IP_RECVTOS:
 598        case IP_RETOPTS:
 599        case IP_TOS:
 600        case IP_TTL:
 601        case IP_HDRINCL:
 602        case IP_MTU_DISCOVER:
 603        case IP_RECVERR:
 604        case IP_ROUTER_ALERT:
 605        case IP_FREEBIND:
 606        case IP_PASSSEC:
 607        case IP_TRANSPARENT:
 608        case IP_MINTTL:
 609        case IP_NODEFRAG:
 610        case IP_BIND_ADDRESS_NO_PORT:
 611        case IP_UNICAST_IF:
 612        case IP_MULTICAST_TTL:
 613        case IP_MULTICAST_ALL:
 614        case IP_MULTICAST_LOOP:
 615        case IP_RECVORIGDSTADDR:
 616        case IP_CHECKSUM:
 617                if (optlen >= sizeof(int)) {
 618                        if (get_user(val, (int __user *) optval))
 619                                return -EFAULT;
 620                } else if (optlen >= sizeof(char)) {
 621                        unsigned char ucval;
 622
 623                        if (get_user(ucval, (unsigned char __user *) optval))
 624                                return -EFAULT;
 625                        val = (int) ucval;
 626                }
 627        }
 628
 629        /* If optlen==0, it is equivalent to val == 0 */
 630
 631        if (ip_mroute_opt(optname))
 632                return ip_mroute_setsockopt(sk, optname, optval, optlen);
 633
 634        err = 0;
 635        if (needs_rtnl)
 636                rtnl_lock();
 637        lock_sock(sk);
 638
 639        switch (optname) {
 640        case IP_OPTIONS:
 641        {
 642                struct ip_options_rcu *old, *opt = NULL;
 643
 644                if (optlen > 40)
 645                        goto e_inval;
 646                err = ip_options_get_from_user(sock_net(sk), &opt,
 647                                               optval, optlen);
 648                if (err)
 649                        break;
 650                old = rcu_dereference_protected(inet->inet_opt,
 651                                                lockdep_sock_is_held(sk));
 652                if (inet->is_icsk) {
 653                        struct inet_connection_sock *icsk = inet_csk(sk);
 654#if IS_ENABLED(CONFIG_IPV6)
 655                        if (sk->sk_family == PF_INET ||
 656                            (!((1 << sk->sk_state) &
 657                               (TCPF_LISTEN | TCPF_CLOSE)) &&
 658                             inet->inet_daddr != LOOPBACK4_IPV6)) {
 659#endif
 660                                if (old)
 661                                        icsk->icsk_ext_hdr_len -= old->opt.optlen;
 662                                if (opt)
 663                                        icsk->icsk_ext_hdr_len += opt->opt.optlen;
 664                                icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 665#if IS_ENABLED(CONFIG_IPV6)
 666                        }
 667#endif
 668                }
 669                rcu_assign_pointer(inet->inet_opt, opt);
 670                if (old)
 671                        kfree_rcu(old, rcu);
 672                break;
 673        }
 674        case IP_PKTINFO:
 675                if (val)
 676                        inet->cmsg_flags |= IP_CMSG_PKTINFO;
 677                else
 678                        inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
 679                break;
 680        case IP_RECVTTL:
 681                if (val)
 682                        inet->cmsg_flags |=  IP_CMSG_TTL;
 683                else
 684                        inet->cmsg_flags &= ~IP_CMSG_TTL;
 685                break;
 686        case IP_RECVTOS:
 687                if (val)
 688                        inet->cmsg_flags |=  IP_CMSG_TOS;
 689                else
 690                        inet->cmsg_flags &= ~IP_CMSG_TOS;
 691                break;
 692        case IP_RECVOPTS:
 693                if (val)
 694                        inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
 695                else
 696                        inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
 697                break;
 698        case IP_RETOPTS:
 699                if (val)
 700                        inet->cmsg_flags |= IP_CMSG_RETOPTS;
 701                else
 702                        inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
 703                break;
 704        case IP_PASSSEC:
 705                if (val)
 706                        inet->cmsg_flags |= IP_CMSG_PASSSEC;
 707                else
 708                        inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
 709                break;
 710        case IP_RECVORIGDSTADDR:
 711                if (val)
 712                        inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
 713                else
 714                        inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 715                break;
 716        case IP_CHECKSUM:
 717                if (val) {
 718                        if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
 719                                inet_inc_convert_csum(sk);
 720                                inet->cmsg_flags |= IP_CMSG_CHECKSUM;
 721                        }
 722                } else {
 723                        if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
 724                                inet_dec_convert_csum(sk);
 725                                inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
 726                        }
 727                }
 728                break;
 729        case IP_TOS:    /* This sets both TOS and Precedence */
 730                if (sk->sk_type == SOCK_STREAM) {
 731                        val &= ~INET_ECN_MASK;
 732                        val |= inet->tos & INET_ECN_MASK;
 733                }
 734                if (inet->tos != val) {
 735                        inet->tos = val;
 736                        sk->sk_priority = rt_tos2priority(val);
 737                        sk_dst_reset(sk);
 738                }
 739                break;
 740        case IP_TTL:
 741                if (optlen < 1)
 742                        goto e_inval;
 743                if (val != -1 && (val < 1 || val > 255))
 744                        goto e_inval;
 745                inet->uc_ttl = val;
 746                break;
 747        case IP_HDRINCL:
 748                if (sk->sk_type != SOCK_RAW) {
 749                        err = -ENOPROTOOPT;
 750                        break;
 751                }
 752                inet->hdrincl = val ? 1 : 0;
 753                break;
 754        case IP_NODEFRAG:
 755                if (sk->sk_type != SOCK_RAW) {
 756                        err = -ENOPROTOOPT;
 757                        break;
 758                }
 759                inet->nodefrag = val ? 1 : 0;
 760                break;
 761        case IP_BIND_ADDRESS_NO_PORT:
 762                inet->bind_address_no_port = val ? 1 : 0;
 763                break;
 764        case IP_MTU_DISCOVER:
 765                if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 766                        goto e_inval;
 767                inet->pmtudisc = val;
 768                break;
 769        case IP_RECVERR:
 770                inet->recverr = !!val;
 771                if (!val)
 772                        skb_queue_purge(&sk->sk_error_queue);
 773                break;
 774        case IP_MULTICAST_TTL:
 775                if (sk->sk_type == SOCK_STREAM)
 776                        goto e_inval;
 777                if (optlen < 1)
 778                        goto e_inval;
 779                if (val == -1)
 780                        val = 1;
 781                if (val < 0 || val > 255)
 782                        goto e_inval;
 783                inet->mc_ttl = val;
 784                break;
 785        case IP_MULTICAST_LOOP:
 786                if (optlen < 1)
 787                        goto e_inval;
 788                inet->mc_loop = !!val;
 789                break;
 790        case IP_UNICAST_IF:
 791        {
 792                struct net_device *dev = NULL;
 793                int ifindex;
 794
 795                if (optlen != sizeof(int))
 796                        goto e_inval;
 797
 798                ifindex = (__force int)ntohl((__force __be32)val);
 799                if (ifindex == 0) {
 800                        inet->uc_index = 0;
 801                        err = 0;
 802                        break;
 803                }
 804
 805                dev = dev_get_by_index(sock_net(sk), ifindex);
 806                err = -EADDRNOTAVAIL;
 807                if (!dev)
 808                        break;
 809                dev_put(dev);
 810
 811                err = -EINVAL;
 812                if (sk->sk_bound_dev_if)
 813                        break;
 814
 815                inet->uc_index = ifindex;
 816                err = 0;
 817                break;
 818        }
 819        case IP_MULTICAST_IF:
 820        {
 821                struct ip_mreqn mreq;
 822                struct net_device *dev = NULL;
 823
 824                if (sk->sk_type == SOCK_STREAM)
 825                        goto e_inval;
 826                /*
 827                 *      Check the arguments are allowable
 828                 */
 829
 830                if (optlen < sizeof(struct in_addr))
 831                        goto e_inval;
 832
 833                err = -EFAULT;
 834                if (optlen >= sizeof(struct ip_mreqn)) {
 835                        if (copy_from_user(&mreq, optval, sizeof(mreq)))
 836                                break;
 837                } else {
 838                        memset(&mreq, 0, sizeof(mreq));
 839                        if (optlen >= sizeof(struct ip_mreq)) {
 840                                if (copy_from_user(&mreq, optval,
 841                                                   sizeof(struct ip_mreq)))
 842                                        break;
 843                        } else if (optlen >= sizeof(struct in_addr)) {
 844                                if (copy_from_user(&mreq.imr_address, optval,
 845                                                   sizeof(struct in_addr)))
 846                                        break;
 847                        }
 848                }
 849
 850                if (!mreq.imr_ifindex) {
 851                        if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
 852                                inet->mc_index = 0;
 853                                inet->mc_addr  = 0;
 854                                err = 0;
 855                                break;
 856                        }
 857                        dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
 858                        if (dev)
 859                                mreq.imr_ifindex = dev->ifindex;
 860                } else
 861                        dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
 862
 863
 864                err = -EADDRNOTAVAIL;
 865                if (!dev)
 866                        break;
 867                dev_put(dev);
 868
 869                err = -EINVAL;
 870                if (sk->sk_bound_dev_if &&
 871                    mreq.imr_ifindex != sk->sk_bound_dev_if)
 872                        break;
 873
 874                inet->mc_index = mreq.imr_ifindex;
 875                inet->mc_addr  = mreq.imr_address.s_addr;
 876                err = 0;
 877                break;
 878        }
 879
 880        case IP_ADD_MEMBERSHIP:
 881        case IP_DROP_MEMBERSHIP:
 882        {
 883                struct ip_mreqn mreq;
 884
 885                err = -EPROTO;
 886                if (inet_sk(sk)->is_icsk)
 887                        break;
 888
 889                if (optlen < sizeof(struct ip_mreq))
 890                        goto e_inval;
 891                err = -EFAULT;
 892                if (optlen >= sizeof(struct ip_mreqn)) {
 893                        if (copy_from_user(&mreq, optval, sizeof(mreq)))
 894                                break;
 895                } else {
 896                        memset(&mreq, 0, sizeof(mreq));
 897                        if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
 898                                break;
 899                }
 900
 901                if (optname == IP_ADD_MEMBERSHIP)
 902                        err = ip_mc_join_group(sk, &mreq);
 903                else
 904                        err = ip_mc_leave_group(sk, &mreq);
 905                break;
 906        }
 907        case IP_MSFILTER:
 908        {
 909                struct ip_msfilter *msf;
 910
 911                if (optlen < IP_MSFILTER_SIZE(0))
 912                        goto e_inval;
 913                if (optlen > sysctl_optmem_max) {
 914                        err = -ENOBUFS;
 915                        break;
 916                }
 917                msf = kmalloc(optlen, GFP_KERNEL);
 918                if (!msf) {
 919                        err = -ENOBUFS;
 920                        break;
 921                }
 922                err = -EFAULT;
 923                if (copy_from_user(msf, optval, optlen)) {
 924                        kfree(msf);
 925                        break;
 926                }
 927                /* numsrc >= (1G-4) overflow in 32 bits */
 928                if (msf->imsf_numsrc >= 0x3ffffffcU ||
 929                    msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
 930                        kfree(msf);
 931                        err = -ENOBUFS;
 932                        break;
 933                }
 934                if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
 935                        kfree(msf);
 936                        err = -EINVAL;
 937                        break;
 938                }
 939                err = ip_mc_msfilter(sk, msf, 0);
 940                kfree(msf);
 941                break;
 942        }
 943        case IP_BLOCK_SOURCE:
 944        case IP_UNBLOCK_SOURCE:
 945        case IP_ADD_SOURCE_MEMBERSHIP:
 946        case IP_DROP_SOURCE_MEMBERSHIP:
 947        {
 948                struct ip_mreq_source mreqs;
 949                int omode, add;
 950
 951                if (optlen != sizeof(struct ip_mreq_source))
 952                        goto e_inval;
 953                if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
 954                        err = -EFAULT;
 955                        break;
 956                }
 957                if (optname == IP_BLOCK_SOURCE) {
 958                        omode = MCAST_EXCLUDE;
 959                        add = 1;
 960                } else if (optname == IP_UNBLOCK_SOURCE) {
 961                        omode = MCAST_EXCLUDE;
 962                        add = 0;
 963                } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
 964                        struct ip_mreqn mreq;
 965
 966                        mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 967                        mreq.imr_address.s_addr = mreqs.imr_interface;
 968                        mreq.imr_ifindex = 0;
 969                        err = ip_mc_join_group(sk, &mreq);
 970                        if (err && err != -EADDRINUSE)
 971                                break;
 972                        omode = MCAST_INCLUDE;
 973                        add = 1;
 974                } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
 975                        omode = MCAST_INCLUDE;
 976                        add = 0;
 977                }
 978                err = ip_mc_source(add, omode, sk, &mreqs, 0);
 979                break;
 980        }
 981        case MCAST_JOIN_GROUP:
 982        case MCAST_LEAVE_GROUP:
 983        {
 984                struct group_req greq;
 985                struct sockaddr_in *psin;
 986                struct ip_mreqn mreq;
 987
 988                if (optlen < sizeof(struct group_req))
 989                        goto e_inval;
 990                err = -EFAULT;
 991                if (copy_from_user(&greq, optval, sizeof(greq)))
 992                        break;
 993                psin = (struct sockaddr_in *)&greq.gr_group;
 994                if (psin->sin_family != AF_INET)
 995                        goto e_inval;
 996                memset(&mreq, 0, sizeof(mreq));
 997                mreq.imr_multiaddr = psin->sin_addr;
 998                mreq.imr_ifindex = greq.gr_interface;
 999
1000                if (optname == MCAST_JOIN_GROUP)
1001                        err = ip_mc_join_group(sk, &mreq);
1002                else
1003                        err = ip_mc_leave_group(sk, &mreq);
1004                break;
1005        }
1006        case MCAST_JOIN_SOURCE_GROUP:
1007        case MCAST_LEAVE_SOURCE_GROUP:
1008        case MCAST_BLOCK_SOURCE:
1009        case MCAST_UNBLOCK_SOURCE:
1010        {
1011                struct group_source_req greqs;
1012                struct ip_mreq_source mreqs;
1013                struct sockaddr_in *psin;
1014                int omode, add;
1015
1016                if (optlen != sizeof(struct group_source_req))
1017                        goto e_inval;
1018                if (copy_from_user(&greqs, optval, sizeof(greqs))) {
1019                        err = -EFAULT;
1020                        break;
1021                }
1022                if (greqs.gsr_group.ss_family != AF_INET ||
1023                    greqs.gsr_source.ss_family != AF_INET) {
1024                        err = -EADDRNOTAVAIL;
1025                        break;
1026                }
1027                psin = (struct sockaddr_in *)&greqs.gsr_group;
1028                mreqs.imr_multiaddr = psin->sin_addr.s_addr;
1029                psin = (struct sockaddr_in *)&greqs.gsr_source;
1030                mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
1031                mreqs.imr_interface = 0; /* use index for mc_source */
1032
1033                if (optname == MCAST_BLOCK_SOURCE) {
1034                        omode = MCAST_EXCLUDE;
1035                        add = 1;
1036                } else if (optname == MCAST_UNBLOCK_SOURCE) {
1037                        omode = MCAST_EXCLUDE;
1038                        add = 0;
1039                } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
1040                        struct ip_mreqn mreq;
1041
1042                        psin = (struct sockaddr_in *)&greqs.gsr_group;
1043                        mreq.imr_multiaddr = psin->sin_addr;
1044                        mreq.imr_address.s_addr = 0;
1045                        mreq.imr_ifindex = greqs.gsr_interface;
1046                        err = ip_mc_join_group(sk, &mreq);
1047                        if (err && err != -EADDRINUSE)
1048                                break;
1049                        greqs.gsr_interface = mreq.imr_ifindex;
1050                        omode = MCAST_INCLUDE;
1051                        add = 1;
1052                } else /* MCAST_LEAVE_SOURCE_GROUP */ {
1053                        omode = MCAST_INCLUDE;
1054                        add = 0;
1055                }
1056                err = ip_mc_source(add, omode, sk, &mreqs,
1057                                   greqs.gsr_interface);
1058                break;
1059        }
1060        case MCAST_MSFILTER:
1061        {
1062                struct sockaddr_in *psin;
1063                struct ip_msfilter *msf = NULL;
1064                struct group_filter *gsf = NULL;
1065                int msize, i, ifindex;
1066
1067                if (optlen < GROUP_FILTER_SIZE(0))
1068                        goto e_inval;
1069                if (optlen > sysctl_optmem_max) {
1070                        err = -ENOBUFS;
1071                        break;
1072                }
1073                gsf = kmalloc(optlen, GFP_KERNEL);
1074                if (!gsf) {
1075                        err = -ENOBUFS;
1076                        break;
1077                }
1078                err = -EFAULT;
1079                if (copy_from_user(gsf, optval, optlen))
1080                        goto mc_msf_out;
1081
1082                /* numsrc >= (4G-140)/128 overflow in 32 bits */
1083                if (gsf->gf_numsrc >= 0x1ffffff ||
1084                    gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1085                        err = -ENOBUFS;
1086                        goto mc_msf_out;
1087                }
1088                if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
1089                        err = -EINVAL;
1090                        goto mc_msf_out;
1091                }
1092                msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
1093                msf = kmalloc(msize, GFP_KERNEL);
1094                if (!msf) {
1095                        err = -ENOBUFS;
1096                        goto mc_msf_out;
1097                }
1098                ifindex = gsf->gf_interface;
1099                psin = (struct sockaddr_in *)&gsf->gf_group;
1100                if (psin->sin_family != AF_INET) {
1101                        err = -EADDRNOTAVAIL;
1102                        goto mc_msf_out;
1103                }
1104                msf->imsf_multiaddr = psin->sin_addr.s_addr;
1105                msf->imsf_interface = 0;
1106                msf->imsf_fmode = gsf->gf_fmode;
1107                msf->imsf_numsrc = gsf->gf_numsrc;
1108                err = -EADDRNOTAVAIL;
1109                for (i = 0; i < gsf->gf_numsrc; ++i) {
1110                        psin = (struct sockaddr_in *)&gsf->gf_slist[i];
1111
1112                        if (psin->sin_family != AF_INET)
1113                                goto mc_msf_out;
1114                        msf->imsf_slist[i] = psin->sin_addr.s_addr;
1115                }
1116                kfree(gsf);
1117                gsf = NULL;
1118
1119                err = ip_mc_msfilter(sk, msf, ifindex);
1120mc_msf_out:
1121                kfree(msf);
1122                kfree(gsf);
1123                break;
1124        }
1125        case IP_MULTICAST_ALL:
1126                if (optlen < 1)
1127                        goto e_inval;
1128                if (val != 0 && val != 1)
1129                        goto e_inval;
1130                inet->mc_all = val;
1131                break;
1132        case IP_ROUTER_ALERT:
1133                err = ip_ra_control(sk, val ? 1 : 0, NULL);
1134                break;
1135
1136        case IP_FREEBIND:
1137                if (optlen < 1)
1138                        goto e_inval;
1139                inet->freebind = !!val;
1140                break;
1141
1142        case IP_IPSEC_POLICY:
1143        case IP_XFRM_POLICY:
1144                err = -EPERM;
1145                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1146                        break;
1147                err = xfrm_user_policy(sk, optname, optval, optlen);
1148                break;
1149
1150        case IP_TRANSPARENT:
1151                if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1152                    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1153                        err = -EPERM;
1154                        break;
1155                }
1156                if (optlen < 1)
1157                        goto e_inval;
1158                inet->transparent = !!val;
1159                break;
1160
1161        case IP_MINTTL:
1162                if (optlen < 1)
1163                        goto e_inval;
1164                if (val < 0 || val > 255)
1165                        goto e_inval;
1166                inet->min_ttl = val;
1167                break;
1168
1169        default:
1170                err = -ENOPROTOOPT;
1171                break;
1172        }
1173        release_sock(sk);
1174        if (needs_rtnl)
1175                rtnl_unlock();
1176        return err;
1177
1178e_inval:
1179        release_sock(sk);
1180        if (needs_rtnl)
1181                rtnl_unlock();
1182        return -EINVAL;
1183}
1184
1185/**
1186 * ipv4_pktinfo_prepare - transfer some info from rtable to skb
1187 * @sk: socket
1188 * @skb: buffer
1189 *
1190 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1191 * destination in skb->cb[] before dst drop.
1192 * This way, receiver doesn't make cache line misses to read rtable.
1193 */
1194void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1195{
1196        struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1197        bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1198                       ipv6_sk_rxinfo(sk);
1199
1200        if (prepare && skb_rtable(skb)) {
1201                /* skb->cb is overloaded: prior to this point it is IP{6}CB
1202                 * which has interface index (iif) as the first member of the
1203                 * underlying inet{6}_skb_parm struct. This code then overlays
1204                 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1205                 * element so the iif is picked up from the prior IPCB
1206                 */
1207                pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1208        } else {
1209                pktinfo->ipi_ifindex = 0;
1210                pktinfo->ipi_spec_dst.s_addr = 0;
1211        }
1212        skb_dst_drop(skb);
1213}
1214
1215int ip_setsockopt(struct sock *sk, int level,
1216                int optname, char __user *optval, unsigned int optlen)
1217{
1218        int err;
1219
1220        if (level != SOL_IP)
1221                return -ENOPROTOOPT;
1222
1223        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1224#ifdef CONFIG_NETFILTER
1225        /* we need to exclude all possible ENOPROTOOPTs except default case */
1226        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1227                        optname != IP_IPSEC_POLICY &&
1228                        optname != IP_XFRM_POLICY &&
1229                        !ip_mroute_opt(optname)) {
1230                lock_sock(sk);
1231                err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1232                release_sock(sk);
1233        }
1234#endif
1235        return err;
1236}
1237EXPORT_SYMBOL(ip_setsockopt);
1238
1239#ifdef CONFIG_COMPAT
1240int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1241                         char __user *optval, unsigned int optlen)
1242{
1243        int err;
1244
1245        if (level != SOL_IP)
1246                return -ENOPROTOOPT;
1247
1248        if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
1249                return compat_mc_setsockopt(sk, level, optname, optval, optlen,
1250                        ip_setsockopt);
1251
1252        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1253#ifdef CONFIG_NETFILTER
1254        /* we need to exclude all possible ENOPROTOOPTs except default case */
1255        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1256                        optname != IP_IPSEC_POLICY &&
1257                        optname != IP_XFRM_POLICY &&
1258                        !ip_mroute_opt(optname)) {
1259                lock_sock(sk);
1260                err = compat_nf_setsockopt(sk, PF_INET, optname,
1261                                           optval, optlen);
1262                release_sock(sk);
1263        }
1264#endif
1265        return err;
1266}
1267EXPORT_SYMBOL(compat_ip_setsockopt);
1268#endif
1269
1270/*
1271 *      Get the options. Note for future reference. The GET of IP options gets
1272 *      the _received_ ones. The set sets the _sent_ ones.
1273 */
1274
1275static bool getsockopt_needs_rtnl(int optname)
1276{
1277        switch (optname) {
1278        case IP_MSFILTER:
1279        case MCAST_MSFILTER:
1280                return true;
1281        }
1282        return false;
1283}
1284
1285static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1286                            char __user *optval, int __user *optlen, unsigned int flags)
1287{
1288        struct inet_sock *inet = inet_sk(sk);
1289        bool needs_rtnl = getsockopt_needs_rtnl(optname);
1290        int val, err = 0;
1291        int len;
1292
1293        if (level != SOL_IP)
1294                return -EOPNOTSUPP;
1295
1296        if (ip_mroute_opt(optname))
1297                return ip_mroute_getsockopt(sk, optname, optval, optlen);
1298
1299        if (get_user(len, optlen))
1300                return -EFAULT;
1301        if (len < 0)
1302                return -EINVAL;
1303
1304        if (needs_rtnl)
1305                rtnl_lock();
1306        lock_sock(sk);
1307
1308        switch (optname) {
1309        case IP_OPTIONS:
1310        {
1311                unsigned char optbuf[sizeof(struct ip_options)+40];
1312                struct ip_options *opt = (struct ip_options *)optbuf;
1313                struct ip_options_rcu *inet_opt;
1314
1315                inet_opt = rcu_dereference_protected(inet->inet_opt,
1316                                                     lockdep_sock_is_held(sk));
1317                opt->optlen = 0;
1318                if (inet_opt)
1319                        memcpy(optbuf, &inet_opt->opt,
1320                               sizeof(struct ip_options) +
1321                               inet_opt->opt.optlen);
1322                release_sock(sk);
1323
1324                if (opt->optlen == 0)
1325                        return put_user(0, optlen);
1326
1327                ip_options_undo(opt);
1328
1329                len = min_t(unsigned int, len, opt->optlen);
1330                if (put_user(len, optlen))
1331                        return -EFAULT;
1332                if (copy_to_user(optval, opt->__data, len))
1333                        return -EFAULT;
1334                return 0;
1335        }
1336        case IP_PKTINFO:
1337                val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1338                break;
1339        case IP_RECVTTL:
1340                val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1341                break;
1342        case IP_RECVTOS:
1343                val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1344                break;
1345        case IP_RECVOPTS:
1346                val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1347                break;
1348        case IP_RETOPTS:
1349                val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1350                break;
1351        case IP_PASSSEC:
1352                val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1353                break;
1354        case IP_RECVORIGDSTADDR:
1355                val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1356                break;
1357        case IP_CHECKSUM:
1358                val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1359                break;
1360        case IP_TOS:
1361                val = inet->tos;
1362                break;
1363        case IP_TTL:
1364        {
1365                struct net *net = sock_net(sk);
1366                val = (inet->uc_ttl == -1 ?
1367                       net->ipv4.sysctl_ip_default_ttl :
1368                       inet->uc_ttl);
1369                break;
1370        }
1371        case IP_HDRINCL:
1372                val = inet->hdrincl;
1373                break;
1374        case IP_NODEFRAG:
1375                val = inet->nodefrag;
1376                break;
1377        case IP_BIND_ADDRESS_NO_PORT:
1378                val = inet->bind_address_no_port;
1379                break;
1380        case IP_MTU_DISCOVER:
1381                val = inet->pmtudisc;
1382                break;
1383        case IP_MTU:
1384        {
1385                struct dst_entry *dst;
1386                val = 0;
1387                dst = sk_dst_get(sk);
1388                if (dst) {
1389                        val = dst_mtu(dst);
1390                        dst_release(dst);
1391                }
1392                if (!val) {
1393                        release_sock(sk);
1394                        return -ENOTCONN;
1395                }
1396                break;
1397        }
1398        case IP_RECVERR:
1399                val = inet->recverr;
1400                break;
1401        case IP_MULTICAST_TTL:
1402                val = inet->mc_ttl;
1403                break;
1404        case IP_MULTICAST_LOOP:
1405                val = inet->mc_loop;
1406                break;
1407        case IP_UNICAST_IF:
1408                val = (__force int)htonl((__u32) inet->uc_index);
1409                break;
1410        case IP_MULTICAST_IF:
1411        {
1412                struct in_addr addr;
1413                len = min_t(unsigned int, len, sizeof(struct in_addr));
1414                addr.s_addr = inet->mc_addr;
1415                release_sock(sk);
1416
1417                if (put_user(len, optlen))
1418                        return -EFAULT;
1419                if (copy_to_user(optval, &addr, len))
1420                        return -EFAULT;
1421                return 0;
1422        }
1423        case IP_MSFILTER:
1424        {
1425                struct ip_msfilter msf;
1426
1427                if (len < IP_MSFILTER_SIZE(0)) {
1428                        err = -EINVAL;
1429                        goto out;
1430                }
1431                if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1432                        err = -EFAULT;
1433                        goto out;
1434                }
1435                err = ip_mc_msfget(sk, &msf,
1436                                   (struct ip_msfilter __user *)optval, optlen);
1437                goto out;
1438        }
1439        case MCAST_MSFILTER:
1440        {
1441                struct group_filter gsf;
1442
1443                if (len < GROUP_FILTER_SIZE(0)) {
1444                        err = -EINVAL;
1445                        goto out;
1446                }
1447                if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1448                        err = -EFAULT;
1449                        goto out;
1450                }
1451                err = ip_mc_gsfget(sk, &gsf,
1452                                   (struct group_filter __user *)optval,
1453                                   optlen);
1454                goto out;
1455        }
1456        case IP_MULTICAST_ALL:
1457                val = inet->mc_all;
1458                break;
1459        case IP_PKTOPTIONS:
1460        {
1461                struct msghdr msg;
1462
1463                release_sock(sk);
1464
1465                if (sk->sk_type != SOCK_STREAM)
1466                        return -ENOPROTOOPT;
1467
1468                msg.msg_control = (__force void *) optval;
1469                msg.msg_controllen = len;
1470                msg.msg_flags = flags;
1471
1472                if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1473                        struct in_pktinfo info;
1474
1475                        info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1476                        info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1477                        info.ipi_ifindex = inet->mc_index;
1478                        put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1479                }
1480                if (inet->cmsg_flags & IP_CMSG_TTL) {
1481                        int hlim = inet->mc_ttl;
1482                        put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1483                }
1484                if (inet->cmsg_flags & IP_CMSG_TOS) {
1485                        int tos = inet->rcv_tos;
1486                        put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1487                }
1488                len -= msg.msg_controllen;
1489                return put_user(len, optlen);
1490        }
1491        case IP_FREEBIND:
1492                val = inet->freebind;
1493                break;
1494        case IP_TRANSPARENT:
1495                val = inet->transparent;
1496                break;
1497        case IP_MINTTL:
1498                val = inet->min_ttl;
1499                break;
1500        default:
1501                release_sock(sk);
1502                return -ENOPROTOOPT;
1503        }
1504        release_sock(sk);
1505
1506        if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1507                unsigned char ucval = (unsigned char)val;
1508                len = 1;
1509                if (put_user(len, optlen))
1510                        return -EFAULT;
1511                if (copy_to_user(optval, &ucval, 1))
1512                        return -EFAULT;
1513        } else {
1514                len = min_t(unsigned int, sizeof(int), len);
1515                if (put_user(len, optlen))
1516                        return -EFAULT;
1517                if (copy_to_user(optval, &val, len))
1518                        return -EFAULT;
1519        }
1520        return 0;
1521
1522out:
1523        release_sock(sk);
1524        if (needs_rtnl)
1525                rtnl_unlock();
1526        return err;
1527}
1528
1529int ip_getsockopt(struct sock *sk, int level,
1530                  int optname, char __user *optval, int __user *optlen)
1531{
1532        int err;
1533
1534        err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1535#ifdef CONFIG_NETFILTER
1536        /* we need to exclude all possible ENOPROTOOPTs except default case */
1537        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1538                        !ip_mroute_opt(optname)) {
1539                int len;
1540
1541                if (get_user(len, optlen))
1542                        return -EFAULT;
1543
1544                lock_sock(sk);
1545                err = nf_getsockopt(sk, PF_INET, optname, optval,
1546                                &len);
1547                release_sock(sk);
1548                if (err >= 0)
1549                        err = put_user(len, optlen);
1550                return err;
1551        }
1552#endif
1553        return err;
1554}
1555EXPORT_SYMBOL(ip_getsockopt);
1556
1557#ifdef CONFIG_COMPAT
1558int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1559                         char __user *optval, int __user *optlen)
1560{
1561        int err;
1562
1563        if (optname == MCAST_MSFILTER)
1564                return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1565                        ip_getsockopt);
1566
1567        err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1568                MSG_CMSG_COMPAT);
1569
1570#ifdef CONFIG_NETFILTER
1571        /* we need to exclude all possible ENOPROTOOPTs except default case */
1572        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1573                        !ip_mroute_opt(optname)) {
1574                int len;
1575
1576                if (get_user(len, optlen))
1577                        return -EFAULT;
1578
1579                lock_sock(sk);
1580                err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
1581                release_sock(sk);
1582                if (err >= 0)
1583                        err = put_user(len, optlen);
1584                return err;
1585        }
1586#endif
1587        return err;
1588}
1589EXPORT_SYMBOL(compat_ip_getsockopt);
1590#endif
1591