linux/net/ipv4/ip_sockglue.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4 *              operating system.  INET is implemented using the  BSD Socket
   5 *              interface as the means of communication with the user level.
   6 *
   7 *              The IP to API glue.
   8 *
   9 * Authors:     see ip.c
  10 *
  11 * Fixes:
  12 *              Many            :       Split from ip.c , see ip.c for history.
  13 *              Martin Mares    :       TOS setting fixed.
  14 *              Alan Cox        :       Fixed a couple of oopses in Martin's
  15 *                                      TOS tweaks.
  16 *              Mike McLagan    :       Routing by source
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/types.h>
  21#include <linux/mm.h>
  22#include <linux/skbuff.h>
  23#include <linux/ip.h>
  24#include <linux/icmp.h>
  25#include <linux/inetdevice.h>
  26#include <linux/netdevice.h>
  27#include <linux/slab.h>
  28#include <net/sock.h>
  29#include <net/ip.h>
  30#include <net/icmp.h>
  31#include <net/tcp_states.h>
  32#include <linux/udp.h>
  33#include <linux/igmp.h>
  34#include <linux/netfilter.h>
  35#include <linux/route.h>
  36#include <linux/mroute.h>
  37#include <net/inet_ecn.h>
  38#include <net/route.h>
  39#include <net/xfrm.h>
  40#include <net/compat.h>
  41#include <net/checksum.h>
  42#if IS_ENABLED(CONFIG_IPV6)
  43#include <net/transp_v6.h>
  44#endif
  45#include <net/ip_fib.h>
  46
  47#include <linux/errqueue.h>
  48#include <linux/uaccess.h>
  49
  50/*
  51 *      SOL_IP control messages.
  52 */
  53
  54static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
  55{
  56        struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
  57
  58        info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
  59
  60        put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
  61}
  62
  63static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
  64{
  65        int ttl = ip_hdr(skb)->ttl;
  66        put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
  67}
  68
  69static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
  70{
  71        put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
  72}
  73
  74static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
  75{
  76        if (IPCB(skb)->opt.optlen == 0)
  77                return;
  78
  79        put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
  80                 ip_hdr(skb) + 1);
  81}
  82
  83
  84static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
  85                                 struct sk_buff *skb)
  86{
  87        unsigned char optbuf[sizeof(struct ip_options) + 40];
  88        struct ip_options *opt = (struct ip_options *)optbuf;
  89
  90        if (IPCB(skb)->opt.optlen == 0)
  91                return;
  92
  93        if (ip_options_echo(net, opt, skb)) {
  94                msg->msg_flags |= MSG_CTRUNC;
  95                return;
  96        }
  97        ip_options_undo(opt);
  98
  99        put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 100}
 101
 102static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
 103{
 104        int val;
 105
 106        if (IPCB(skb)->frag_max_size == 0)
 107                return;
 108
 109        val = IPCB(skb)->frag_max_size;
 110        put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
 111}
 112
 113static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
 114                                  int tlen, int offset)
 115{
 116        __wsum csum = skb->csum;
 117
 118        if (skb->ip_summed != CHECKSUM_COMPLETE)
 119                return;
 120
 121        if (offset != 0) {
 122                int tend_off = skb_transport_offset(skb) + tlen;
 123                csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
 124        }
 125
 126        put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
 127}
 128
 129static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 130{
 131        char *secdata;
 132        u32 seclen, secid;
 133        int err;
 134
 135        err = security_socket_getpeersec_dgram(NULL, skb, &secid);
 136        if (err)
 137                return;
 138
 139        err = security_secid_to_secctx(secid, &secdata, &seclen);
 140        if (err)
 141                return;
 142
 143        put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
 144        security_release_secctx(secdata, seclen);
 145}
 146
 147static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 148{
 149        struct sockaddr_in sin;
 150        const struct iphdr *iph = ip_hdr(skb);
 151        __be16 *ports = (__be16 *)skb_transport_header(skb);
 152
 153        if (skb_transport_offset(skb) + 4 > (int)skb->len)
 154                return;
 155
 156        /* All current transport protocols have the port numbers in the
 157         * first four bytes of the transport header and this function is
 158         * written with this assumption in mind.
 159         */
 160
 161        sin.sin_family = AF_INET;
 162        sin.sin_addr.s_addr = iph->daddr;
 163        sin.sin_port = ports[1];
 164        memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 165
 166        put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 167}
 168
 169void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 170                         struct sk_buff *skb, int tlen, int offset)
 171{
 172        struct inet_sock *inet = inet_sk(sk);
 173        unsigned int flags = inet->cmsg_flags;
 174
 175        /* Ordered by supposed usage frequency */
 176        if (flags & IP_CMSG_PKTINFO) {
 177                ip_cmsg_recv_pktinfo(msg, skb);
 178
 179                flags &= ~IP_CMSG_PKTINFO;
 180                if (!flags)
 181                        return;
 182        }
 183
 184        if (flags & IP_CMSG_TTL) {
 185                ip_cmsg_recv_ttl(msg, skb);
 186
 187                flags &= ~IP_CMSG_TTL;
 188                if (!flags)
 189                        return;
 190        }
 191
 192        if (flags & IP_CMSG_TOS) {
 193                ip_cmsg_recv_tos(msg, skb);
 194
 195                flags &= ~IP_CMSG_TOS;
 196                if (!flags)
 197                        return;
 198        }
 199
 200        if (flags & IP_CMSG_RECVOPTS) {
 201                ip_cmsg_recv_opts(msg, skb);
 202
 203                flags &= ~IP_CMSG_RECVOPTS;
 204                if (!flags)
 205                        return;
 206        }
 207
 208        if (flags & IP_CMSG_RETOPTS) {
 209                ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 210
 211                flags &= ~IP_CMSG_RETOPTS;
 212                if (!flags)
 213                        return;
 214        }
 215
 216        if (flags & IP_CMSG_PASSSEC) {
 217                ip_cmsg_recv_security(msg, skb);
 218
 219                flags &= ~IP_CMSG_PASSSEC;
 220                if (!flags)
 221                        return;
 222        }
 223
 224        if (flags & IP_CMSG_ORIGDSTADDR) {
 225                ip_cmsg_recv_dstaddr(msg, skb);
 226
 227                flags &= ~IP_CMSG_ORIGDSTADDR;
 228                if (!flags)
 229                        return;
 230        }
 231
 232        if (flags & IP_CMSG_CHECKSUM)
 233                ip_cmsg_recv_checksum(msg, skb, tlen, offset);
 234
 235        if (flags & IP_CMSG_RECVFRAGSIZE)
 236                ip_cmsg_recv_fragsize(msg, skb);
 237}
 238EXPORT_SYMBOL(ip_cmsg_recv_offset);
 239
 240int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 241                 bool allow_ipv6)
 242{
 243        int err, val;
 244        struct cmsghdr *cmsg;
 245        struct net *net = sock_net(sk);
 246
 247        for_each_cmsghdr(cmsg, msg) {
 248                if (!CMSG_OK(msg, cmsg))
 249                        return -EINVAL;
 250#if IS_ENABLED(CONFIG_IPV6)
 251                if (allow_ipv6 &&
 252                    cmsg->cmsg_level == SOL_IPV6 &&
 253                    cmsg->cmsg_type == IPV6_PKTINFO) {
 254                        struct in6_pktinfo *src_info;
 255
 256                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
 257                                return -EINVAL;
 258                        src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 259                        if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 260                                return -EINVAL;
 261                        ipc->oif = src_info->ipi6_ifindex;
 262                        ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 263                        continue;
 264                }
 265#endif
 266                if (cmsg->cmsg_level == SOL_SOCKET) {
 267                        err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
 268                        if (err)
 269                                return err;
 270                        continue;
 271                }
 272
 273                if (cmsg->cmsg_level != SOL_IP)
 274                        continue;
 275                switch (cmsg->cmsg_type) {
 276                case IP_RETOPTS:
 277                        err = cmsg->cmsg_len - sizeof(struct cmsghdr);
 278
 279                        /* Our caller is responsible for freeing ipc->opt */
 280                        err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
 281                                             err < 40 ? err : 40);
 282                        if (err)
 283                                return err;
 284                        break;
 285                case IP_PKTINFO:
 286                {
 287                        struct in_pktinfo *info;
 288                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 289                                return -EINVAL;
 290                        info = (struct in_pktinfo *)CMSG_DATA(cmsg);
 291                        ipc->oif = info->ipi_ifindex;
 292                        ipc->addr = info->ipi_spec_dst.s_addr;
 293                        break;
 294                }
 295                case IP_TTL:
 296                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
 297                                return -EINVAL;
 298                        val = *(int *)CMSG_DATA(cmsg);
 299                        if (val < 1 || val > 255)
 300                                return -EINVAL;
 301                        ipc->ttl = val;
 302                        break;
 303                case IP_TOS:
 304                        if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
 305                                val = *(int *)CMSG_DATA(cmsg);
 306                        else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
 307                                val = *(u8 *)CMSG_DATA(cmsg);
 308                        else
 309                                return -EINVAL;
 310                        if (val < 0 || val > 255)
 311                                return -EINVAL;
 312                        ipc->tos = val;
 313                        ipc->priority = rt_tos2priority(ipc->tos);
 314                        break;
 315
 316                default:
 317                        return -EINVAL;
 318                }
 319        }
 320        return 0;
 321}
 322
 323
 324/* Special input handler for packets caught by router alert option.
 325   They are selected only by protocol field, and then processed likely
 326   local ones; but only if someone wants them! Otherwise, router
 327   not running rsvpd will kill RSVP.
 328
 329   It is user level problem, what it will make with them.
 330   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
 331   but receiver should be enough clever f.e. to forward mtrace requests,
 332   sent to multicast group to reach destination designated router.
 333 */
 334struct ip_ra_chain __rcu *ip_ra_chain;
 335
 336
 337static void ip_ra_destroy_rcu(struct rcu_head *head)
 338{
 339        struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
 340
 341        sock_put(ra->saved_sk);
 342        kfree(ra);
 343}
 344
 345int ip_ra_control(struct sock *sk, unsigned char on,
 346                  void (*destructor)(struct sock *))
 347{
 348        struct ip_ra_chain *ra, *new_ra;
 349        struct ip_ra_chain __rcu **rap;
 350
 351        if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 352                return -EINVAL;
 353
 354        new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 355
 356        for (rap = &ip_ra_chain;
 357             (ra = rtnl_dereference(*rap)) != NULL;
 358             rap = &ra->next) {
 359                if (ra->sk == sk) {
 360                        if (on) {
 361                                kfree(new_ra);
 362                                return -EADDRINUSE;
 363                        }
 364                        /* dont let ip_call_ra_chain() use sk again */
 365                        ra->sk = NULL;
 366                        RCU_INIT_POINTER(*rap, ra->next);
 367
 368                        if (ra->destructor)
 369                                ra->destructor(sk);
 370                        /*
 371                         * Delay sock_put(sk) and kfree(ra) after one rcu grace
 372                         * period. This guarantee ip_call_ra_chain() dont need
 373                         * to mess with socket refcounts.
 374                         */
 375                        ra->saved_sk = sk;
 376                        call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 377                        return 0;
 378                }
 379        }
 380        if (!new_ra)
 381                return -ENOBUFS;
 382        new_ra->sk = sk;
 383        new_ra->destructor = destructor;
 384
 385        RCU_INIT_POINTER(new_ra->next, ra);
 386        rcu_assign_pointer(*rap, new_ra);
 387        sock_hold(sk);
 388
 389        return 0;
 390}
 391
 392void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 393                   __be16 port, u32 info, u8 *payload)
 394{
 395        struct sock_exterr_skb *serr;
 396
 397        skb = skb_clone(skb, GFP_ATOMIC);
 398        if (!skb)
 399                return;
 400
 401        serr = SKB_EXT_ERR(skb);
 402        serr->ee.ee_errno = err;
 403        serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
 404        serr->ee.ee_type = icmp_hdr(skb)->type;
 405        serr->ee.ee_code = icmp_hdr(skb)->code;
 406        serr->ee.ee_pad = 0;
 407        serr->ee.ee_info = info;
 408        serr->ee.ee_data = 0;
 409        serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
 410                                   skb_network_header(skb);
 411        serr->port = port;
 412
 413        if (skb_pull(skb, payload - skb->data)) {
 414                skb_reset_transport_header(skb);
 415                if (sock_queue_err_skb(sk, skb) == 0)
 416                        return;
 417        }
 418        kfree_skb(skb);
 419}
 420
 421void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
 422{
 423        struct inet_sock *inet = inet_sk(sk);
 424        struct sock_exterr_skb *serr;
 425        struct iphdr *iph;
 426        struct sk_buff *skb;
 427
 428        if (!inet->recverr)
 429                return;
 430
 431        skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
 432        if (!skb)
 433                return;
 434
 435        skb_put(skb, sizeof(struct iphdr));
 436        skb_reset_network_header(skb);
 437        iph = ip_hdr(skb);
 438        iph->daddr = daddr;
 439
 440        serr = SKB_EXT_ERR(skb);
 441        serr->ee.ee_errno = err;
 442        serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
 443        serr->ee.ee_type = 0;
 444        serr->ee.ee_code = 0;
 445        serr->ee.ee_pad = 0;
 446        serr->ee.ee_info = info;
 447        serr->ee.ee_data = 0;
 448        serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 449        serr->port = port;
 450
 451        __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 452        skb_reset_transport_header(skb);
 453
 454        if (sock_queue_err_skb(sk, skb))
 455                kfree_skb(skb);
 456}
 457
 458/* For some errors we have valid addr_offset even with zero payload and
 459 * zero port. Also, addr_offset should be supported if port is set.
 460 */
 461static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
 462{
 463        return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
 464               serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
 465}
 466
 467/* IPv4 supports cmsg on all imcp errors and some timestamps
 468 *
 469 * Timestamp code paths do not initialize the fields expected by cmsg:
 470 * the PKTINFO fields in skb->cb[]. Fill those in here.
 471 */
 472static bool ipv4_datagram_support_cmsg(const struct sock *sk,
 473                                       struct sk_buff *skb,
 474                                       int ee_origin)
 475{
 476        struct in_pktinfo *info;
 477
 478        if (ee_origin == SO_EE_ORIGIN_ICMP)
 479                return true;
 480
 481        if (ee_origin == SO_EE_ORIGIN_LOCAL)
 482                return false;
 483
 484        /* Support IP_PKTINFO on tstamp packets if requested, to correlate
 485         * timestamp with egress dev. Not possible for packets without iif
 486         * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
 487         */
 488        info = PKTINFO_SKB_CB(skb);
 489        if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
 490            !info->ipi_ifindex)
 491                return false;
 492
 493        info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
 494        return true;
 495}
 496
 497/*
 498 *      Handle MSG_ERRQUEUE
 499 */
 500int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 501{
 502        struct sock_exterr_skb *serr;
 503        struct sk_buff *skb;
 504        DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 505        struct {
 506                struct sock_extended_err ee;
 507                struct sockaddr_in       offender;
 508        } errhdr;
 509        int err;
 510        int copied;
 511
 512        WARN_ON_ONCE(sk->sk_family == AF_INET6);
 513
 514        err = -EAGAIN;
 515        skb = sock_dequeue_err_skb(sk);
 516        if (!skb)
 517                goto out;
 518
 519        copied = skb->len;
 520        if (copied > len) {
 521                msg->msg_flags |= MSG_TRUNC;
 522                copied = len;
 523        }
 524        err = skb_copy_datagram_msg(skb, 0, msg, copied);
 525        if (unlikely(err)) {
 526                kfree_skb(skb);
 527                return err;
 528        }
 529        sock_recv_timestamp(msg, sk, skb);
 530
 531        serr = SKB_EXT_ERR(skb);
 532
 533        if (sin && ipv4_datagram_support_addr(serr)) {
 534                sin->sin_family = AF_INET;
 535                sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 536                                                   serr->addr_offset);
 537                sin->sin_port = serr->port;
 538                memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 539                *addr_len = sizeof(*sin);
 540        }
 541
 542        memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 543        sin = &errhdr.offender;
 544        memset(sin, 0, sizeof(*sin));
 545
 546        if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
 547                sin->sin_family = AF_INET;
 548                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 549                if (inet_sk(sk)->cmsg_flags)
 550                        ip_cmsg_recv(msg, skb);
 551        }
 552
 553        put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
 554
 555        /* Now we could try to dump offended packet options */
 556
 557        msg->msg_flags |= MSG_ERRQUEUE;
 558        err = copied;
 559
 560        consume_skb(skb);
 561out:
 562        return err;
 563}
 564
 565
 566/*
 567 *      Socket option code for IP. This is the end of the line after any
 568 *      TCP,UDP etc options on an IP socket.
 569 */
 570static bool setsockopt_needs_rtnl(int optname)
 571{
 572        switch (optname) {
 573        case IP_ADD_MEMBERSHIP:
 574        case IP_ADD_SOURCE_MEMBERSHIP:
 575        case IP_BLOCK_SOURCE:
 576        case IP_DROP_MEMBERSHIP:
 577        case IP_DROP_SOURCE_MEMBERSHIP:
 578        case IP_MSFILTER:
 579        case IP_UNBLOCK_SOURCE:
 580        case MCAST_BLOCK_SOURCE:
 581        case MCAST_MSFILTER:
 582        case MCAST_JOIN_GROUP:
 583        case MCAST_JOIN_SOURCE_GROUP:
 584        case MCAST_LEAVE_GROUP:
 585        case MCAST_LEAVE_SOURCE_GROUP:
 586        case MCAST_UNBLOCK_SOURCE:
 587        case IP_ROUTER_ALERT:
 588                return true;
 589        }
 590        return false;
 591}
 592
 593static int do_ip_setsockopt(struct sock *sk, int level,
 594                            int optname, char __user *optval, unsigned int optlen)
 595{
 596        struct inet_sock *inet = inet_sk(sk);
 597        struct net *net = sock_net(sk);
 598        int val = 0, err;
 599        bool needs_rtnl = setsockopt_needs_rtnl(optname);
 600
 601        switch (optname) {
 602        case IP_PKTINFO:
 603        case IP_RECVTTL:
 604        case IP_RECVOPTS:
 605        case IP_RECVTOS:
 606        case IP_RETOPTS:
 607        case IP_TOS:
 608        case IP_TTL:
 609        case IP_HDRINCL:
 610        case IP_MTU_DISCOVER:
 611        case IP_RECVERR:
 612        case IP_ROUTER_ALERT:
 613        case IP_FREEBIND:
 614        case IP_PASSSEC:
 615        case IP_TRANSPARENT:
 616        case IP_MINTTL:
 617        case IP_NODEFRAG:
 618        case IP_BIND_ADDRESS_NO_PORT:
 619        case IP_UNICAST_IF:
 620        case IP_MULTICAST_TTL:
 621        case IP_MULTICAST_ALL:
 622        case IP_MULTICAST_LOOP:
 623        case IP_RECVORIGDSTADDR:
 624        case IP_CHECKSUM:
 625        case IP_RECVFRAGSIZE:
 626                if (optlen >= sizeof(int)) {
 627                        if (get_user(val, (int __user *) optval))
 628                                return -EFAULT;
 629                } else if (optlen >= sizeof(char)) {
 630                        unsigned char ucval;
 631
 632                        if (get_user(ucval, (unsigned char __user *) optval))
 633                                return -EFAULT;
 634                        val = (int) ucval;
 635                }
 636        }
 637
 638        /* If optlen==0, it is equivalent to val == 0 */
 639
 640        if (ip_mroute_opt(optname))
 641                return ip_mroute_setsockopt(sk, optname, optval, optlen);
 642
 643        err = 0;
 644        if (needs_rtnl)
 645                rtnl_lock();
 646        lock_sock(sk);
 647
 648        switch (optname) {
 649        case IP_OPTIONS:
 650        {
 651                struct ip_options_rcu *old, *opt = NULL;
 652
 653                if (optlen > 40)
 654                        goto e_inval;
 655                err = ip_options_get_from_user(sock_net(sk), &opt,
 656                                               optval, optlen);
 657                if (err)
 658                        break;
 659                old = rcu_dereference_protected(inet->inet_opt,
 660                                                lockdep_sock_is_held(sk));
 661                if (inet->is_icsk) {
 662                        struct inet_connection_sock *icsk = inet_csk(sk);
 663#if IS_ENABLED(CONFIG_IPV6)
 664                        if (sk->sk_family == PF_INET ||
 665                            (!((1 << sk->sk_state) &
 666                               (TCPF_LISTEN | TCPF_CLOSE)) &&
 667                             inet->inet_daddr != LOOPBACK4_IPV6)) {
 668#endif
 669                                if (old)
 670                                        icsk->icsk_ext_hdr_len -= old->opt.optlen;
 671                                if (opt)
 672                                        icsk->icsk_ext_hdr_len += opt->opt.optlen;
 673                                icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 674#if IS_ENABLED(CONFIG_IPV6)
 675                        }
 676#endif
 677                }
 678                rcu_assign_pointer(inet->inet_opt, opt);
 679                if (old)
 680                        kfree_rcu(old, rcu);
 681                break;
 682        }
 683        case IP_PKTINFO:
 684                if (val)
 685                        inet->cmsg_flags |= IP_CMSG_PKTINFO;
 686                else
 687                        inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
 688                break;
 689        case IP_RECVTTL:
 690                if (val)
 691                        inet->cmsg_flags |=  IP_CMSG_TTL;
 692                else
 693                        inet->cmsg_flags &= ~IP_CMSG_TTL;
 694                break;
 695        case IP_RECVTOS:
 696                if (val)
 697                        inet->cmsg_flags |=  IP_CMSG_TOS;
 698                else
 699                        inet->cmsg_flags &= ~IP_CMSG_TOS;
 700                break;
 701        case IP_RECVOPTS:
 702                if (val)
 703                        inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
 704                else
 705                        inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
 706                break;
 707        case IP_RETOPTS:
 708                if (val)
 709                        inet->cmsg_flags |= IP_CMSG_RETOPTS;
 710                else
 711                        inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
 712                break;
 713        case IP_PASSSEC:
 714                if (val)
 715                        inet->cmsg_flags |= IP_CMSG_PASSSEC;
 716                else
 717                        inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
 718                break;
 719        case IP_RECVORIGDSTADDR:
 720                if (val)
 721                        inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
 722                else
 723                        inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 724                break;
 725        case IP_CHECKSUM:
 726                if (val) {
 727                        if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
 728                                inet_inc_convert_csum(sk);
 729                                inet->cmsg_flags |= IP_CMSG_CHECKSUM;
 730                        }
 731                } else {
 732                        if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
 733                                inet_dec_convert_csum(sk);
 734                                inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
 735                        }
 736                }
 737                break;
 738        case IP_RECVFRAGSIZE:
 739                if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
 740                        goto e_inval;
 741                if (val)
 742                        inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
 743                else
 744                        inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
 745                break;
 746        case IP_TOS:    /* This sets both TOS and Precedence */
 747                if (sk->sk_type == SOCK_STREAM) {
 748                        val &= ~INET_ECN_MASK;
 749                        val |= inet->tos & INET_ECN_MASK;
 750                }
 751                if (inet->tos != val) {
 752                        inet->tos = val;
 753                        sk->sk_priority = rt_tos2priority(val);
 754                        sk_dst_reset(sk);
 755                }
 756                break;
 757        case IP_TTL:
 758                if (optlen < 1)
 759                        goto e_inval;
 760                if (val != -1 && (val < 1 || val > 255))
 761                        goto e_inval;
 762                inet->uc_ttl = val;
 763                break;
 764        case IP_HDRINCL:
 765                if (sk->sk_type != SOCK_RAW) {
 766                        err = -ENOPROTOOPT;
 767                        break;
 768                }
 769                inet->hdrincl = val ? 1 : 0;
 770                break;
 771        case IP_NODEFRAG:
 772                if (sk->sk_type != SOCK_RAW) {
 773                        err = -ENOPROTOOPT;
 774                        break;
 775                }
 776                inet->nodefrag = val ? 1 : 0;
 777                break;
 778        case IP_BIND_ADDRESS_NO_PORT:
 779                inet->bind_address_no_port = val ? 1 : 0;
 780                break;
 781        case IP_MTU_DISCOVER:
 782                if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 783                        goto e_inval;
 784                inet->pmtudisc = val;
 785                break;
 786        case IP_RECVERR:
 787                inet->recverr = !!val;
 788                if (!val)
 789                        skb_queue_purge(&sk->sk_error_queue);
 790                break;
 791        case IP_MULTICAST_TTL:
 792                if (sk->sk_type == SOCK_STREAM)
 793                        goto e_inval;
 794                if (optlen < 1)
 795                        goto e_inval;
 796                if (val == -1)
 797                        val = 1;
 798                if (val < 0 || val > 255)
 799                        goto e_inval;
 800                inet->mc_ttl = val;
 801                break;
 802        case IP_MULTICAST_LOOP:
 803                if (optlen < 1)
 804                        goto e_inval;
 805                inet->mc_loop = !!val;
 806                break;
 807        case IP_UNICAST_IF:
 808        {
 809                struct net_device *dev = NULL;
 810                int ifindex;
 811
 812                if (optlen != sizeof(int))
 813                        goto e_inval;
 814
 815                ifindex = (__force int)ntohl((__force __be32)val);
 816                if (ifindex == 0) {
 817                        inet->uc_index = 0;
 818                        err = 0;
 819                        break;
 820                }
 821
 822                dev = dev_get_by_index(sock_net(sk), ifindex);
 823                err = -EADDRNOTAVAIL;
 824                if (!dev)
 825                        break;
 826                dev_put(dev);
 827
 828                err = -EINVAL;
 829                if (sk->sk_bound_dev_if)
 830                        break;
 831
 832                inet->uc_index = ifindex;
 833                err = 0;
 834                break;
 835        }
 836        case IP_MULTICAST_IF:
 837        {
 838                struct ip_mreqn mreq;
 839                struct net_device *dev = NULL;
 840                int midx;
 841
 842                if (sk->sk_type == SOCK_STREAM)
 843                        goto e_inval;
 844                /*
 845                 *      Check the arguments are allowable
 846                 */
 847
 848                if (optlen < sizeof(struct in_addr))
 849                        goto e_inval;
 850
 851                err = -EFAULT;
 852                if (optlen >= sizeof(struct ip_mreqn)) {
 853                        if (copy_from_user(&mreq, optval, sizeof(mreq)))
 854                                break;
 855                } else {
 856                        memset(&mreq, 0, sizeof(mreq));
 857                        if (optlen >= sizeof(struct ip_mreq)) {
 858                                if (copy_from_user(&mreq, optval,
 859                                                   sizeof(struct ip_mreq)))
 860                                        break;
 861                        } else if (optlen >= sizeof(struct in_addr)) {
 862                                if (copy_from_user(&mreq.imr_address, optval,
 863                                                   sizeof(struct in_addr)))
 864                                        break;
 865                        }
 866                }
 867
 868                if (!mreq.imr_ifindex) {
 869                        if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
 870                                inet->mc_index = 0;
 871                                inet->mc_addr  = 0;
 872                                err = 0;
 873                                break;
 874                        }
 875                        dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
 876                        if (dev)
 877                                mreq.imr_ifindex = dev->ifindex;
 878                } else
 879                        dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
 880
 881
 882                err = -EADDRNOTAVAIL;
 883                if (!dev)
 884                        break;
 885
 886                midx = l3mdev_master_ifindex(dev);
 887
 888                dev_put(dev);
 889
 890                err = -EINVAL;
 891                if (sk->sk_bound_dev_if &&
 892                    mreq.imr_ifindex != sk->sk_bound_dev_if &&
 893                    (!midx || midx != sk->sk_bound_dev_if))
 894                        break;
 895
 896                inet->mc_index = mreq.imr_ifindex;
 897                inet->mc_addr  = mreq.imr_address.s_addr;
 898                err = 0;
 899                break;
 900        }
 901
 902        case IP_ADD_MEMBERSHIP:
 903        case IP_DROP_MEMBERSHIP:
 904        {
 905                struct ip_mreqn mreq;
 906
 907                err = -EPROTO;
 908                if (inet_sk(sk)->is_icsk)
 909                        break;
 910
 911                if (optlen < sizeof(struct ip_mreq))
 912                        goto e_inval;
 913                err = -EFAULT;
 914                if (optlen >= sizeof(struct ip_mreqn)) {
 915                        if (copy_from_user(&mreq, optval, sizeof(mreq)))
 916                                break;
 917                } else {
 918                        memset(&mreq, 0, sizeof(mreq));
 919                        if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
 920                                break;
 921                }
 922
 923                if (optname == IP_ADD_MEMBERSHIP)
 924                        err = ip_mc_join_group(sk, &mreq);
 925                else
 926                        err = ip_mc_leave_group(sk, &mreq);
 927                break;
 928        }
 929        case IP_MSFILTER:
 930        {
 931                struct ip_msfilter *msf;
 932
 933                if (optlen < IP_MSFILTER_SIZE(0))
 934                        goto e_inval;
 935                if (optlen > sysctl_optmem_max) {
 936                        err = -ENOBUFS;
 937                        break;
 938                }
 939                msf = memdup_user(optval, optlen);
 940                if (IS_ERR(msf)) {
 941                        err = PTR_ERR(msf);
 942                        break;
 943                }
 944                /* numsrc >= (1G-4) overflow in 32 bits */
 945                if (msf->imsf_numsrc >= 0x3ffffffcU ||
 946                    msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
 947                        kfree(msf);
 948                        err = -ENOBUFS;
 949                        break;
 950                }
 951                if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
 952                        kfree(msf);
 953                        err = -EINVAL;
 954                        break;
 955                }
 956                err = ip_mc_msfilter(sk, msf, 0);
 957                kfree(msf);
 958                break;
 959        }
 960        case IP_BLOCK_SOURCE:
 961        case IP_UNBLOCK_SOURCE:
 962        case IP_ADD_SOURCE_MEMBERSHIP:
 963        case IP_DROP_SOURCE_MEMBERSHIP:
 964        {
 965                struct ip_mreq_source mreqs;
 966                int omode, add;
 967
 968                if (optlen != sizeof(struct ip_mreq_source))
 969                        goto e_inval;
 970                if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
 971                        err = -EFAULT;
 972                        break;
 973                }
 974                if (optname == IP_BLOCK_SOURCE) {
 975                        omode = MCAST_EXCLUDE;
 976                        add = 1;
 977                } else if (optname == IP_UNBLOCK_SOURCE) {
 978                        omode = MCAST_EXCLUDE;
 979                        add = 0;
 980                } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
 981                        struct ip_mreqn mreq;
 982
 983                        mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 984                        mreq.imr_address.s_addr = mreqs.imr_interface;
 985                        mreq.imr_ifindex = 0;
 986                        err = ip_mc_join_group(sk, &mreq);
 987                        if (err && err != -EADDRINUSE)
 988                                break;
 989                        omode = MCAST_INCLUDE;
 990                        add = 1;
 991                } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
 992                        omode = MCAST_INCLUDE;
 993                        add = 0;
 994                }
 995                err = ip_mc_source(add, omode, sk, &mreqs, 0);
 996                break;
 997        }
 998        case MCAST_JOIN_GROUP:
 999        case MCAST_LEAVE_GROUP:
1000        {
1001                struct group_req greq;
1002                struct sockaddr_in *psin;
1003                struct ip_mreqn mreq;
1004
1005                if (optlen < sizeof(struct group_req))
1006                        goto e_inval;
1007                err = -EFAULT;
1008                if (copy_from_user(&greq, optval, sizeof(greq)))
1009                        break;
1010                psin = (struct sockaddr_in *)&greq.gr_group;
1011                if (psin->sin_family != AF_INET)
1012                        goto e_inval;
1013                memset(&mreq, 0, sizeof(mreq));
1014                mreq.imr_multiaddr = psin->sin_addr;
1015                mreq.imr_ifindex = greq.gr_interface;
1016
1017                if (optname == MCAST_JOIN_GROUP)
1018                        err = ip_mc_join_group(sk, &mreq);
1019                else
1020                        err = ip_mc_leave_group(sk, &mreq);
1021                break;
1022        }
1023        case MCAST_JOIN_SOURCE_GROUP:
1024        case MCAST_LEAVE_SOURCE_GROUP:
1025        case MCAST_BLOCK_SOURCE:
1026        case MCAST_UNBLOCK_SOURCE:
1027        {
1028                struct group_source_req greqs;
1029                struct ip_mreq_source mreqs;
1030                struct sockaddr_in *psin;
1031                int omode, add;
1032
1033                if (optlen != sizeof(struct group_source_req))
1034                        goto e_inval;
1035                if (copy_from_user(&greqs, optval, sizeof(greqs))) {
1036                        err = -EFAULT;
1037                        break;
1038                }
1039                if (greqs.gsr_group.ss_family != AF_INET ||
1040                    greqs.gsr_source.ss_family != AF_INET) {
1041                        err = -EADDRNOTAVAIL;
1042                        break;
1043                }
1044                psin = (struct sockaddr_in *)&greqs.gsr_group;
1045                mreqs.imr_multiaddr = psin->sin_addr.s_addr;
1046                psin = (struct sockaddr_in *)&greqs.gsr_source;
1047                mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
1048                mreqs.imr_interface = 0; /* use index for mc_source */
1049
1050                if (optname == MCAST_BLOCK_SOURCE) {
1051                        omode = MCAST_EXCLUDE;
1052                        add = 1;
1053                } else if (optname == MCAST_UNBLOCK_SOURCE) {
1054                        omode = MCAST_EXCLUDE;
1055                        add = 0;
1056                } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
1057                        struct ip_mreqn mreq;
1058
1059                        psin = (struct sockaddr_in *)&greqs.gsr_group;
1060                        mreq.imr_multiaddr = psin->sin_addr;
1061                        mreq.imr_address.s_addr = 0;
1062                        mreq.imr_ifindex = greqs.gsr_interface;
1063                        err = ip_mc_join_group(sk, &mreq);
1064                        if (err && err != -EADDRINUSE)
1065                                break;
1066                        greqs.gsr_interface = mreq.imr_ifindex;
1067                        omode = MCAST_INCLUDE;
1068                        add = 1;
1069                } else /* MCAST_LEAVE_SOURCE_GROUP */ {
1070                        omode = MCAST_INCLUDE;
1071                        add = 0;
1072                }
1073                err = ip_mc_source(add, omode, sk, &mreqs,
1074                                   greqs.gsr_interface);
1075                break;
1076        }
1077        case MCAST_MSFILTER:
1078        {
1079                struct sockaddr_in *psin;
1080                struct ip_msfilter *msf = NULL;
1081                struct group_filter *gsf = NULL;
1082                int msize, i, ifindex;
1083
1084                if (optlen < GROUP_FILTER_SIZE(0))
1085                        goto e_inval;
1086                if (optlen > sysctl_optmem_max) {
1087                        err = -ENOBUFS;
1088                        break;
1089                }
1090                gsf = memdup_user(optval, optlen);
1091                if (IS_ERR(gsf)) {
1092                        err = PTR_ERR(gsf);
1093                        break;
1094                }
1095
1096                /* numsrc >= (4G-140)/128 overflow in 32 bits */
1097                if (gsf->gf_numsrc >= 0x1ffffff ||
1098                    gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1099                        err = -ENOBUFS;
1100                        goto mc_msf_out;
1101                }
1102                if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
1103                        err = -EINVAL;
1104                        goto mc_msf_out;
1105                }
1106                msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
1107                msf = kmalloc(msize, GFP_KERNEL);
1108                if (!msf) {
1109                        err = -ENOBUFS;
1110                        goto mc_msf_out;
1111                }
1112                ifindex = gsf->gf_interface;
1113                psin = (struct sockaddr_in *)&gsf->gf_group;
1114                if (psin->sin_family != AF_INET) {
1115                        err = -EADDRNOTAVAIL;
1116                        goto mc_msf_out;
1117                }
1118                msf->imsf_multiaddr = psin->sin_addr.s_addr;
1119                msf->imsf_interface = 0;
1120                msf->imsf_fmode = gsf->gf_fmode;
1121                msf->imsf_numsrc = gsf->gf_numsrc;
1122                err = -EADDRNOTAVAIL;
1123                for (i = 0; i < gsf->gf_numsrc; ++i) {
1124                        psin = (struct sockaddr_in *)&gsf->gf_slist[i];
1125
1126                        if (psin->sin_family != AF_INET)
1127                                goto mc_msf_out;
1128                        msf->imsf_slist[i] = psin->sin_addr.s_addr;
1129                }
1130                kfree(gsf);
1131                gsf = NULL;
1132
1133                err = ip_mc_msfilter(sk, msf, ifindex);
1134mc_msf_out:
1135                kfree(msf);
1136                kfree(gsf);
1137                break;
1138        }
1139        case IP_MULTICAST_ALL:
1140                if (optlen < 1)
1141                        goto e_inval;
1142                if (val != 0 && val != 1)
1143                        goto e_inval;
1144                inet->mc_all = val;
1145                break;
1146        case IP_ROUTER_ALERT:
1147                err = ip_ra_control(sk, val ? 1 : 0, NULL);
1148                break;
1149
1150        case IP_FREEBIND:
1151                if (optlen < 1)
1152                        goto e_inval;
1153                inet->freebind = !!val;
1154                break;
1155
1156        case IP_IPSEC_POLICY:
1157        case IP_XFRM_POLICY:
1158                err = -EPERM;
1159                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1160                        break;
1161                err = xfrm_user_policy(sk, optname, optval, optlen);
1162                break;
1163
1164        case IP_TRANSPARENT:
1165                if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1166                    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1167                        err = -EPERM;
1168                        break;
1169                }
1170                if (optlen < 1)
1171                        goto e_inval;
1172                inet->transparent = !!val;
1173                break;
1174
1175        case IP_MINTTL:
1176                if (optlen < 1)
1177                        goto e_inval;
1178                if (val < 0 || val > 255)
1179                        goto e_inval;
1180                inet->min_ttl = val;
1181                break;
1182
1183        default:
1184                err = -ENOPROTOOPT;
1185                break;
1186        }
1187        release_sock(sk);
1188        if (needs_rtnl)
1189                rtnl_unlock();
1190        return err;
1191
1192e_inval:
1193        release_sock(sk);
1194        if (needs_rtnl)
1195                rtnl_unlock();
1196        return -EINVAL;
1197}
1198
1199/**
1200 * ipv4_pktinfo_prepare - transfer some info from rtable to skb
1201 * @sk: socket
1202 * @skb: buffer
1203 *
1204 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1205 * destination in skb->cb[] before dst drop.
1206 * This way, receiver doesn't make cache line misses to read rtable.
1207 */
1208void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1209{
1210        struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1211        bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1212                       ipv6_sk_rxinfo(sk);
1213
1214        if (prepare && skb_rtable(skb)) {
1215                /* skb->cb is overloaded: prior to this point it is IP{6}CB
1216                 * which has interface index (iif) as the first member of the
1217                 * underlying inet{6}_skb_parm struct. This code then overlays
1218                 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1219                 * element so the iif is picked up from the prior IPCB. If iif
1220                 * is the loopback interface, then return the sending interface
1221                 * (e.g., process binds socket to eth0 for Tx which is
1222                 * redirected to loopback in the rtable/dst).
1223                 */
1224                struct rtable *rt = skb_rtable(skb);
1225                bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
1226
1227                if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
1228                        pktinfo->ipi_ifindex = inet_iif(skb);
1229                else if (l3slave && rt && rt->rt_iif)
1230                        pktinfo->ipi_ifindex = rt->rt_iif;
1231
1232                pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1233        } else {
1234                pktinfo->ipi_ifindex = 0;
1235                pktinfo->ipi_spec_dst.s_addr = 0;
1236        }
1237        skb_dst_drop(skb);
1238}
1239
1240int ip_setsockopt(struct sock *sk, int level,
1241                int optname, char __user *optval, unsigned int optlen)
1242{
1243        int err;
1244
1245        if (level != SOL_IP)
1246                return -ENOPROTOOPT;
1247
1248        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1249#ifdef CONFIG_NETFILTER
1250        /* we need to exclude all possible ENOPROTOOPTs except default case */
1251        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1252                        optname != IP_IPSEC_POLICY &&
1253                        optname != IP_XFRM_POLICY &&
1254                        !ip_mroute_opt(optname)) {
1255                lock_sock(sk);
1256                err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1257                release_sock(sk);
1258        }
1259#endif
1260        return err;
1261}
1262EXPORT_SYMBOL(ip_setsockopt);
1263
1264#ifdef CONFIG_COMPAT
1265int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1266                         char __user *optval, unsigned int optlen)
1267{
1268        int err;
1269
1270        if (level != SOL_IP)
1271                return -ENOPROTOOPT;
1272
1273        if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
1274                return compat_mc_setsockopt(sk, level, optname, optval, optlen,
1275                        ip_setsockopt);
1276
1277        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1278#ifdef CONFIG_NETFILTER
1279        /* we need to exclude all possible ENOPROTOOPTs except default case */
1280        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1281                        optname != IP_IPSEC_POLICY &&
1282                        optname != IP_XFRM_POLICY &&
1283                        !ip_mroute_opt(optname)) {
1284                lock_sock(sk);
1285                err = compat_nf_setsockopt(sk, PF_INET, optname,
1286                                           optval, optlen);
1287                release_sock(sk);
1288        }
1289#endif
1290        return err;
1291}
1292EXPORT_SYMBOL(compat_ip_setsockopt);
1293#endif
1294
1295/*
1296 *      Get the options. Note for future reference. The GET of IP options gets
1297 *      the _received_ ones. The set sets the _sent_ ones.
1298 */
1299
1300static bool getsockopt_needs_rtnl(int optname)
1301{
1302        switch (optname) {
1303        case IP_MSFILTER:
1304        case MCAST_MSFILTER:
1305                return true;
1306        }
1307        return false;
1308}
1309
1310static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1311                            char __user *optval, int __user *optlen, unsigned int flags)
1312{
1313        struct inet_sock *inet = inet_sk(sk);
1314        bool needs_rtnl = getsockopt_needs_rtnl(optname);
1315        int val, err = 0;
1316        int len;
1317
1318        if (level != SOL_IP)
1319                return -EOPNOTSUPP;
1320
1321        if (ip_mroute_opt(optname))
1322                return ip_mroute_getsockopt(sk, optname, optval, optlen);
1323
1324        if (get_user(len, optlen))
1325                return -EFAULT;
1326        if (len < 0)
1327                return -EINVAL;
1328
1329        if (needs_rtnl)
1330                rtnl_lock();
1331        lock_sock(sk);
1332
1333        switch (optname) {
1334        case IP_OPTIONS:
1335        {
1336                unsigned char optbuf[sizeof(struct ip_options)+40];
1337                struct ip_options *opt = (struct ip_options *)optbuf;
1338                struct ip_options_rcu *inet_opt;
1339
1340                inet_opt = rcu_dereference_protected(inet->inet_opt,
1341                                                     lockdep_sock_is_held(sk));
1342                opt->optlen = 0;
1343                if (inet_opt)
1344                        memcpy(optbuf, &inet_opt->opt,
1345                               sizeof(struct ip_options) +
1346                               inet_opt->opt.optlen);
1347                release_sock(sk);
1348
1349                if (opt->optlen == 0)
1350                        return put_user(0, optlen);
1351
1352                ip_options_undo(opt);
1353
1354                len = min_t(unsigned int, len, opt->optlen);
1355                if (put_user(len, optlen))
1356                        return -EFAULT;
1357                if (copy_to_user(optval, opt->__data, len))
1358                        return -EFAULT;
1359                return 0;
1360        }
1361        case IP_PKTINFO:
1362                val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1363                break;
1364        case IP_RECVTTL:
1365                val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1366                break;
1367        case IP_RECVTOS:
1368                val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1369                break;
1370        case IP_RECVOPTS:
1371                val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1372                break;
1373        case IP_RETOPTS:
1374                val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1375                break;
1376        case IP_PASSSEC:
1377                val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1378                break;
1379        case IP_RECVORIGDSTADDR:
1380                val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1381                break;
1382        case IP_CHECKSUM:
1383                val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1384                break;
1385        case IP_RECVFRAGSIZE:
1386                val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
1387                break;
1388        case IP_TOS:
1389                val = inet->tos;
1390                break;
1391        case IP_TTL:
1392        {
1393                struct net *net = sock_net(sk);
1394                val = (inet->uc_ttl == -1 ?
1395                       net->ipv4.sysctl_ip_default_ttl :
1396                       inet->uc_ttl);
1397                break;
1398        }
1399        case IP_HDRINCL:
1400                val = inet->hdrincl;
1401                break;
1402        case IP_NODEFRAG:
1403                val = inet->nodefrag;
1404                break;
1405        case IP_BIND_ADDRESS_NO_PORT:
1406                val = inet->bind_address_no_port;
1407                break;
1408        case IP_MTU_DISCOVER:
1409                val = inet->pmtudisc;
1410                break;
1411        case IP_MTU:
1412        {
1413                struct dst_entry *dst;
1414                val = 0;
1415                dst = sk_dst_get(sk);
1416                if (dst) {
1417                        val = dst_mtu(dst);
1418                        dst_release(dst);
1419                }
1420                if (!val) {
1421                        release_sock(sk);
1422                        return -ENOTCONN;
1423                }
1424                break;
1425        }
1426        case IP_RECVERR:
1427                val = inet->recverr;
1428                break;
1429        case IP_MULTICAST_TTL:
1430                val = inet->mc_ttl;
1431                break;
1432        case IP_MULTICAST_LOOP:
1433                val = inet->mc_loop;
1434                break;
1435        case IP_UNICAST_IF:
1436                val = (__force int)htonl((__u32) inet->uc_index);
1437                break;
1438        case IP_MULTICAST_IF:
1439        {
1440                struct in_addr addr;
1441                len = min_t(unsigned int, len, sizeof(struct in_addr));
1442                addr.s_addr = inet->mc_addr;
1443                release_sock(sk);
1444
1445                if (put_user(len, optlen))
1446                        return -EFAULT;
1447                if (copy_to_user(optval, &addr, len))
1448                        return -EFAULT;
1449                return 0;
1450        }
1451        case IP_MSFILTER:
1452        {
1453                struct ip_msfilter msf;
1454
1455                if (len < IP_MSFILTER_SIZE(0)) {
1456                        err = -EINVAL;
1457                        goto out;
1458                }
1459                if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1460                        err = -EFAULT;
1461                        goto out;
1462                }
1463                err = ip_mc_msfget(sk, &msf,
1464                                   (struct ip_msfilter __user *)optval, optlen);
1465                goto out;
1466        }
1467        case MCAST_MSFILTER:
1468        {
1469                struct group_filter gsf;
1470
1471                if (len < GROUP_FILTER_SIZE(0)) {
1472                        err = -EINVAL;
1473                        goto out;
1474                }
1475                if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1476                        err = -EFAULT;
1477                        goto out;
1478                }
1479                err = ip_mc_gsfget(sk, &gsf,
1480                                   (struct group_filter __user *)optval,
1481                                   optlen);
1482                goto out;
1483        }
1484        case IP_MULTICAST_ALL:
1485                val = inet->mc_all;
1486                break;
1487        case IP_PKTOPTIONS:
1488        {
1489                struct msghdr msg;
1490
1491                release_sock(sk);
1492
1493                if (sk->sk_type != SOCK_STREAM)
1494                        return -ENOPROTOOPT;
1495
1496                msg.msg_control = (__force void *) optval;
1497                msg.msg_controllen = len;
1498                msg.msg_flags = flags;
1499
1500                if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1501                        struct in_pktinfo info;
1502
1503                        info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1504                        info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1505                        info.ipi_ifindex = inet->mc_index;
1506                        put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1507                }
1508                if (inet->cmsg_flags & IP_CMSG_TTL) {
1509                        int hlim = inet->mc_ttl;
1510                        put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1511                }
1512                if (inet->cmsg_flags & IP_CMSG_TOS) {
1513                        int tos = inet->rcv_tos;
1514                        put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1515                }
1516                len -= msg.msg_controllen;
1517                return put_user(len, optlen);
1518        }
1519        case IP_FREEBIND:
1520                val = inet->freebind;
1521                break;
1522        case IP_TRANSPARENT:
1523                val = inet->transparent;
1524                break;
1525        case IP_MINTTL:
1526                val = inet->min_ttl;
1527                break;
1528        default:
1529                release_sock(sk);
1530                return -ENOPROTOOPT;
1531        }
1532        release_sock(sk);
1533
1534        if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1535                unsigned char ucval = (unsigned char)val;
1536                len = 1;
1537                if (put_user(len, optlen))
1538                        return -EFAULT;
1539                if (copy_to_user(optval, &ucval, 1))
1540                        return -EFAULT;
1541        } else {
1542                len = min_t(unsigned int, sizeof(int), len);
1543                if (put_user(len, optlen))
1544                        return -EFAULT;
1545                if (copy_to_user(optval, &val, len))
1546                        return -EFAULT;
1547        }
1548        return 0;
1549
1550out:
1551        release_sock(sk);
1552        if (needs_rtnl)
1553                rtnl_unlock();
1554        return err;
1555}
1556
1557int ip_getsockopt(struct sock *sk, int level,
1558                  int optname, char __user *optval, int __user *optlen)
1559{
1560        int err;
1561
1562        err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1563#ifdef CONFIG_NETFILTER
1564        /* we need to exclude all possible ENOPROTOOPTs except default case */
1565        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1566                        !ip_mroute_opt(optname)) {
1567                int len;
1568
1569                if (get_user(len, optlen))
1570                        return -EFAULT;
1571
1572                lock_sock(sk);
1573                err = nf_getsockopt(sk, PF_INET, optname, optval,
1574                                &len);
1575                release_sock(sk);
1576                if (err >= 0)
1577                        err = put_user(len, optlen);
1578                return err;
1579        }
1580#endif
1581        return err;
1582}
1583EXPORT_SYMBOL(ip_getsockopt);
1584
1585#ifdef CONFIG_COMPAT
1586int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1587                         char __user *optval, int __user *optlen)
1588{
1589        int err;
1590
1591        if (optname == MCAST_MSFILTER)
1592                return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1593                        ip_getsockopt);
1594
1595        err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1596                MSG_CMSG_COMPAT);
1597
1598#ifdef CONFIG_NETFILTER
1599        /* we need to exclude all possible ENOPROTOOPTs except default case */
1600        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1601                        !ip_mroute_opt(optname)) {
1602                int len;
1603
1604                if (get_user(len, optlen))
1605                        return -EFAULT;
1606
1607                lock_sock(sk);
1608                err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
1609                release_sock(sk);
1610                if (err >= 0)
1611                        err = put_user(len, optlen);
1612                return err;
1613        }
1614#endif
1615        return err;
1616}
1617EXPORT_SYMBOL(compat_ip_getsockopt);
1618#endif
1619