linux/net/ipv4/ip_sockglue.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4 *              operating system.  INET is implemented using the  BSD Socket
   5 *              interface as the means of communication with the user level.
   6 *
   7 *              The IP to API glue.
   8 *
   9 * Authors:     see ip.c
  10 *
  11 * Fixes:
  12 *              Many            :       Split from ip.c , see ip.c for history.
  13 *              Martin Mares    :       TOS setting fixed.
  14 *              Alan Cox        :       Fixed a couple of oopses in Martin's
  15 *                                      TOS tweaks.
  16 *              Mike McLagan    :       Routing by source
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/types.h>
  21#include <linux/mm.h>
  22#include <linux/skbuff.h>
  23#include <linux/ip.h>
  24#include <linux/icmp.h>
  25#include <linux/inetdevice.h>
  26#include <linux/netdevice.h>
  27#include <linux/slab.h>
  28#include <net/sock.h>
  29#include <net/ip.h>
  30#include <net/icmp.h>
  31#include <net/tcp_states.h>
  32#include <linux/udp.h>
  33#include <linux/igmp.h>
  34#include <linux/netfilter.h>
  35#include <linux/route.h>
  36#include <linux/mroute.h>
  37#include <net/inet_ecn.h>
  38#include <net/route.h>
  39#include <net/xfrm.h>
  40#include <net/compat.h>
  41#include <net/checksum.h>
  42#if IS_ENABLED(CONFIG_IPV6)
  43#include <net/transp_v6.h>
  44#endif
  45#include <net/ip_fib.h>
  46
  47#include <linux/errqueue.h>
  48#include <linux/uaccess.h>
  49
  50#include <linux/bpfilter.h>
  51
  52/*
  53 *      SOL_IP control messages.
  54 */
  55
  56static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
  57{
  58        struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
  59
  60        info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
  61
  62        put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
  63}
  64
  65static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
  66{
  67        int ttl = ip_hdr(skb)->ttl;
  68        put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
  69}
  70
  71static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
  72{
  73        put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
  74}
  75
  76static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
  77{
  78        if (IPCB(skb)->opt.optlen == 0)
  79                return;
  80
  81        put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
  82                 ip_hdr(skb) + 1);
  83}
  84
  85
  86static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
  87                                 struct sk_buff *skb)
  88{
  89        unsigned char optbuf[sizeof(struct ip_options) + 40];
  90        struct ip_options *opt = (struct ip_options *)optbuf;
  91
  92        if (IPCB(skb)->opt.optlen == 0)
  93                return;
  94
  95        if (ip_options_echo(net, opt, skb)) {
  96                msg->msg_flags |= MSG_CTRUNC;
  97                return;
  98        }
  99        ip_options_undo(opt);
 100
 101        put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 102}
 103
 104static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
 105{
 106        int val;
 107
 108        if (IPCB(skb)->frag_max_size == 0)
 109                return;
 110
 111        val = IPCB(skb)->frag_max_size;
 112        put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
 113}
 114
 115static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
 116                                  int tlen, int offset)
 117{
 118        __wsum csum = skb->csum;
 119
 120        if (skb->ip_summed != CHECKSUM_COMPLETE)
 121                return;
 122
 123        if (offset != 0) {
 124                int tend_off = skb_transport_offset(skb) + tlen;
 125                csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
 126        }
 127
 128        put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
 129}
 130
 131static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 132{
 133        char *secdata;
 134        u32 seclen, secid;
 135        int err;
 136
 137        err = security_socket_getpeersec_dgram(NULL, skb, &secid);
 138        if (err)
 139                return;
 140
 141        err = security_secid_to_secctx(secid, &secdata, &seclen);
 142        if (err)
 143                return;
 144
 145        put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
 146        security_release_secctx(secdata, seclen);
 147}
 148
 149static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 150{
 151        __be16 _ports[2], *ports;
 152        struct sockaddr_in sin;
 153
 154        /* All current transport protocols have the port numbers in the
 155         * first four bytes of the transport header and this function is
 156         * written with this assumption in mind.
 157         */
 158        ports = skb_header_pointer(skb, skb_transport_offset(skb),
 159                                   sizeof(_ports), &_ports);
 160        if (!ports)
 161                return;
 162
 163        sin.sin_family = AF_INET;
 164        sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
 165        sin.sin_port = ports[1];
 166        memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 167
 168        put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 169}
 170
 171void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 172                         struct sk_buff *skb, int tlen, int offset)
 173{
 174        struct inet_sock *inet = inet_sk(sk);
 175        unsigned int flags = inet->cmsg_flags;
 176
 177        /* Ordered by supposed usage frequency */
 178        if (flags & IP_CMSG_PKTINFO) {
 179                ip_cmsg_recv_pktinfo(msg, skb);
 180
 181                flags &= ~IP_CMSG_PKTINFO;
 182                if (!flags)
 183                        return;
 184        }
 185
 186        if (flags & IP_CMSG_TTL) {
 187                ip_cmsg_recv_ttl(msg, skb);
 188
 189                flags &= ~IP_CMSG_TTL;
 190                if (!flags)
 191                        return;
 192        }
 193
 194        if (flags & IP_CMSG_TOS) {
 195                ip_cmsg_recv_tos(msg, skb);
 196
 197                flags &= ~IP_CMSG_TOS;
 198                if (!flags)
 199                        return;
 200        }
 201
 202        if (flags & IP_CMSG_RECVOPTS) {
 203                ip_cmsg_recv_opts(msg, skb);
 204
 205                flags &= ~IP_CMSG_RECVOPTS;
 206                if (!flags)
 207                        return;
 208        }
 209
 210        if (flags & IP_CMSG_RETOPTS) {
 211                ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 212
 213                flags &= ~IP_CMSG_RETOPTS;
 214                if (!flags)
 215                        return;
 216        }
 217
 218        if (flags & IP_CMSG_PASSSEC) {
 219                ip_cmsg_recv_security(msg, skb);
 220
 221                flags &= ~IP_CMSG_PASSSEC;
 222                if (!flags)
 223                        return;
 224        }
 225
 226        if (flags & IP_CMSG_ORIGDSTADDR) {
 227                ip_cmsg_recv_dstaddr(msg, skb);
 228
 229                flags &= ~IP_CMSG_ORIGDSTADDR;
 230                if (!flags)
 231                        return;
 232        }
 233
 234        if (flags & IP_CMSG_CHECKSUM)
 235                ip_cmsg_recv_checksum(msg, skb, tlen, offset);
 236
 237        if (flags & IP_CMSG_RECVFRAGSIZE)
 238                ip_cmsg_recv_fragsize(msg, skb);
 239}
 240EXPORT_SYMBOL(ip_cmsg_recv_offset);
 241
 242int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 243                 bool allow_ipv6)
 244{
 245        int err, val;
 246        struct cmsghdr *cmsg;
 247        struct net *net = sock_net(sk);
 248
 249        for_each_cmsghdr(cmsg, msg) {
 250                if (!CMSG_OK(msg, cmsg))
 251                        return -EINVAL;
 252#if IS_ENABLED(CONFIG_IPV6)
 253                if (allow_ipv6 &&
 254                    cmsg->cmsg_level == SOL_IPV6 &&
 255                    cmsg->cmsg_type == IPV6_PKTINFO) {
 256                        struct in6_pktinfo *src_info;
 257
 258                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
 259                                return -EINVAL;
 260                        src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 261                        if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 262                                return -EINVAL;
 263                        if (src_info->ipi6_ifindex)
 264                                ipc->oif = src_info->ipi6_ifindex;
 265                        ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 266                        continue;
 267                }
 268#endif
 269                if (cmsg->cmsg_level == SOL_SOCKET) {
 270                        err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
 271                        if (err)
 272                                return err;
 273                        continue;
 274                }
 275
 276                if (cmsg->cmsg_level != SOL_IP)
 277                        continue;
 278                switch (cmsg->cmsg_type) {
 279                case IP_RETOPTS:
 280                        err = cmsg->cmsg_len - sizeof(struct cmsghdr);
 281
 282                        /* Our caller is responsible for freeing ipc->opt */
 283                        err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
 284                                             err < 40 ? err : 40);
 285                        if (err)
 286                                return err;
 287                        break;
 288                case IP_PKTINFO:
 289                {
 290                        struct in_pktinfo *info;
 291                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 292                                return -EINVAL;
 293                        info = (struct in_pktinfo *)CMSG_DATA(cmsg);
 294                        if (info->ipi_ifindex)
 295                                ipc->oif = info->ipi_ifindex;
 296                        ipc->addr = info->ipi_spec_dst.s_addr;
 297                        break;
 298                }
 299                case IP_TTL:
 300                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
 301                                return -EINVAL;
 302                        val = *(int *)CMSG_DATA(cmsg);
 303                        if (val < 1 || val > 255)
 304                                return -EINVAL;
 305                        ipc->ttl = val;
 306                        break;
 307                case IP_TOS:
 308                        if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
 309                                val = *(int *)CMSG_DATA(cmsg);
 310                        else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
 311                                val = *(u8 *)CMSG_DATA(cmsg);
 312                        else
 313                                return -EINVAL;
 314                        if (val < 0 || val > 255)
 315                                return -EINVAL;
 316                        ipc->tos = val;
 317                        ipc->priority = rt_tos2priority(ipc->tos);
 318                        break;
 319
 320                default:
 321                        return -EINVAL;
 322                }
 323        }
 324        return 0;
 325}
 326
 327static void ip_ra_destroy_rcu(struct rcu_head *head)
 328{
 329        struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
 330
 331        sock_put(ra->saved_sk);
 332        kfree(ra);
 333}
 334
 335int ip_ra_control(struct sock *sk, unsigned char on,
 336                  void (*destructor)(struct sock *))
 337{
 338        struct ip_ra_chain *ra, *new_ra;
 339        struct ip_ra_chain __rcu **rap;
 340        struct net *net = sock_net(sk);
 341
 342        if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 343                return -EINVAL;
 344
 345        new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 346        if (on && !new_ra)
 347                return -ENOMEM;
 348
 349        mutex_lock(&net->ipv4.ra_mutex);
 350        for (rap = &net->ipv4.ra_chain;
 351             (ra = rcu_dereference_protected(*rap,
 352                        lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
 353             rap = &ra->next) {
 354                if (ra->sk == sk) {
 355                        if (on) {
 356                                mutex_unlock(&net->ipv4.ra_mutex);
 357                                kfree(new_ra);
 358                                return -EADDRINUSE;
 359                        }
 360                        /* dont let ip_call_ra_chain() use sk again */
 361                        ra->sk = NULL;
 362                        RCU_INIT_POINTER(*rap, ra->next);
 363                        mutex_unlock(&net->ipv4.ra_mutex);
 364
 365                        if (ra->destructor)
 366                                ra->destructor(sk);
 367                        /*
 368                         * Delay sock_put(sk) and kfree(ra) after one rcu grace
 369                         * period. This guarantee ip_call_ra_chain() dont need
 370                         * to mess with socket refcounts.
 371                         */
 372                        ra->saved_sk = sk;
 373                        call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 374                        return 0;
 375                }
 376        }
 377        if (!new_ra) {
 378                mutex_unlock(&net->ipv4.ra_mutex);
 379                return -ENOBUFS;
 380        }
 381        new_ra->sk = sk;
 382        new_ra->destructor = destructor;
 383
 384        RCU_INIT_POINTER(new_ra->next, ra);
 385        rcu_assign_pointer(*rap, new_ra);
 386        sock_hold(sk);
 387        mutex_unlock(&net->ipv4.ra_mutex);
 388
 389        return 0;
 390}
 391
 392void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 393                   __be16 port, u32 info, u8 *payload)
 394{
 395        struct sock_exterr_skb *serr;
 396
 397        skb = skb_clone(skb, GFP_ATOMIC);
 398        if (!skb)
 399                return;
 400
 401        serr = SKB_EXT_ERR(skb);
 402        serr->ee.ee_errno = err;
 403        serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
 404        serr->ee.ee_type = icmp_hdr(skb)->type;
 405        serr->ee.ee_code = icmp_hdr(skb)->code;
 406        serr->ee.ee_pad = 0;
 407        serr->ee.ee_info = info;
 408        serr->ee.ee_data = 0;
 409        serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
 410                                   skb_network_header(skb);
 411        serr->port = port;
 412
 413        if (skb_pull(skb, payload - skb->data)) {
 414                skb_reset_transport_header(skb);
 415                if (sock_queue_err_skb(sk, skb) == 0)
 416                        return;
 417        }
 418        kfree_skb(skb);
 419}
 420
 421void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
 422{
 423        struct inet_sock *inet = inet_sk(sk);
 424        struct sock_exterr_skb *serr;
 425        struct iphdr *iph;
 426        struct sk_buff *skb;
 427
 428        if (!inet->recverr)
 429                return;
 430
 431        skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
 432        if (!skb)
 433                return;
 434
 435        skb_put(skb, sizeof(struct iphdr));
 436        skb_reset_network_header(skb);
 437        iph = ip_hdr(skb);
 438        iph->daddr = daddr;
 439
 440        serr = SKB_EXT_ERR(skb);
 441        serr->ee.ee_errno = err;
 442        serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
 443        serr->ee.ee_type = 0;
 444        serr->ee.ee_code = 0;
 445        serr->ee.ee_pad = 0;
 446        serr->ee.ee_info = info;
 447        serr->ee.ee_data = 0;
 448        serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 449        serr->port = port;
 450
 451        __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 452        skb_reset_transport_header(skb);
 453
 454        if (sock_queue_err_skb(sk, skb))
 455                kfree_skb(skb);
 456}
 457
 458/* For some errors we have valid addr_offset even with zero payload and
 459 * zero port. Also, addr_offset should be supported if port is set.
 460 */
 461static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
 462{
 463        return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
 464               serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
 465}
 466
 467/* IPv4 supports cmsg on all imcp errors and some timestamps
 468 *
 469 * Timestamp code paths do not initialize the fields expected by cmsg:
 470 * the PKTINFO fields in skb->cb[]. Fill those in here.
 471 */
 472static bool ipv4_datagram_support_cmsg(const struct sock *sk,
 473                                       struct sk_buff *skb,
 474                                       int ee_origin)
 475{
 476        struct in_pktinfo *info;
 477
 478        if (ee_origin == SO_EE_ORIGIN_ICMP)
 479                return true;
 480
 481        if (ee_origin == SO_EE_ORIGIN_LOCAL)
 482                return false;
 483
 484        /* Support IP_PKTINFO on tstamp packets if requested, to correlate
 485         * timestamp with egress dev. Not possible for packets without iif
 486         * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
 487         */
 488        info = PKTINFO_SKB_CB(skb);
 489        if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
 490            !info->ipi_ifindex)
 491                return false;
 492
 493        info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
 494        return true;
 495}
 496
 497/*
 498 *      Handle MSG_ERRQUEUE
 499 */
 500int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 501{
 502        struct sock_exterr_skb *serr;
 503        struct sk_buff *skb;
 504        DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 505        struct {
 506                struct sock_extended_err ee;
 507                struct sockaddr_in       offender;
 508        } errhdr;
 509        int err;
 510        int copied;
 511
 512        err = -EAGAIN;
 513        skb = sock_dequeue_err_skb(sk);
 514        if (!skb)
 515                goto out;
 516
 517        copied = skb->len;
 518        if (copied > len) {
 519                msg->msg_flags |= MSG_TRUNC;
 520                copied = len;
 521        }
 522        err = skb_copy_datagram_msg(skb, 0, msg, copied);
 523        if (unlikely(err)) {
 524                kfree_skb(skb);
 525                return err;
 526        }
 527        sock_recv_timestamp(msg, sk, skb);
 528
 529        serr = SKB_EXT_ERR(skb);
 530
 531        if (sin && ipv4_datagram_support_addr(serr)) {
 532                sin->sin_family = AF_INET;
 533                sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 534                                                   serr->addr_offset);
 535                sin->sin_port = serr->port;
 536                memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 537                *addr_len = sizeof(*sin);
 538        }
 539
 540        memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 541        sin = &errhdr.offender;
 542        memset(sin, 0, sizeof(*sin));
 543
 544        if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
 545                sin->sin_family = AF_INET;
 546                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 547                if (inet_sk(sk)->cmsg_flags)
 548                        ip_cmsg_recv(msg, skb);
 549        }
 550
 551        put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
 552
 553        /* Now we could try to dump offended packet options */
 554
 555        msg->msg_flags |= MSG_ERRQUEUE;
 556        err = copied;
 557
 558        consume_skb(skb);
 559out:
 560        return err;
 561}
 562
 563
 564/*
 565 *      Socket option code for IP. This is the end of the line after any
 566 *      TCP,UDP etc options on an IP socket.
 567 */
 568static bool setsockopt_needs_rtnl(int optname)
 569{
 570        switch (optname) {
 571        case IP_ADD_MEMBERSHIP:
 572        case IP_ADD_SOURCE_MEMBERSHIP:
 573        case IP_BLOCK_SOURCE:
 574        case IP_DROP_MEMBERSHIP:
 575        case IP_DROP_SOURCE_MEMBERSHIP:
 576        case IP_MSFILTER:
 577        case IP_UNBLOCK_SOURCE:
 578        case MCAST_BLOCK_SOURCE:
 579        case MCAST_MSFILTER:
 580        case MCAST_JOIN_GROUP:
 581        case MCAST_JOIN_SOURCE_GROUP:
 582        case MCAST_LEAVE_GROUP:
 583        case MCAST_LEAVE_SOURCE_GROUP:
 584        case MCAST_UNBLOCK_SOURCE:
 585                return true;
 586        }
 587        return false;
 588}
 589
 590static int do_ip_setsockopt(struct sock *sk, int level,
 591                            int optname, char __user *optval, unsigned int optlen)
 592{
 593        struct inet_sock *inet = inet_sk(sk);
 594        struct net *net = sock_net(sk);
 595        int val = 0, err;
 596        bool needs_rtnl = setsockopt_needs_rtnl(optname);
 597
 598        switch (optname) {
 599        case IP_PKTINFO:
 600        case IP_RECVTTL:
 601        case IP_RECVOPTS:
 602        case IP_RECVTOS:
 603        case IP_RETOPTS:
 604        case IP_TOS:
 605        case IP_TTL:
 606        case IP_HDRINCL:
 607        case IP_MTU_DISCOVER:
 608        case IP_RECVERR:
 609        case IP_ROUTER_ALERT:
 610        case IP_FREEBIND:
 611        case IP_PASSSEC:
 612        case IP_TRANSPARENT:
 613        case IP_MINTTL:
 614        case IP_NODEFRAG:
 615        case IP_BIND_ADDRESS_NO_PORT:
 616        case IP_UNICAST_IF:
 617        case IP_MULTICAST_TTL:
 618        case IP_MULTICAST_ALL:
 619        case IP_MULTICAST_LOOP:
 620        case IP_RECVORIGDSTADDR:
 621        case IP_CHECKSUM:
 622        case IP_RECVFRAGSIZE:
 623                if (optlen >= sizeof(int)) {
 624                        if (get_user(val, (int __user *) optval))
 625                                return -EFAULT;
 626                } else if (optlen >= sizeof(char)) {
 627                        unsigned char ucval;
 628
 629                        if (get_user(ucval, (unsigned char __user *) optval))
 630                                return -EFAULT;
 631                        val = (int) ucval;
 632                }
 633        }
 634
 635        /* If optlen==0, it is equivalent to val == 0 */
 636
 637        if (optname == IP_ROUTER_ALERT)
 638                return ip_ra_control(sk, val ? 1 : 0, NULL);
 639        if (ip_mroute_opt(optname))
 640                return ip_mroute_setsockopt(sk, optname, optval, optlen);
 641
 642        err = 0;
 643        if (needs_rtnl)
 644                rtnl_lock();
 645        lock_sock(sk);
 646
 647        switch (optname) {
 648        case IP_OPTIONS:
 649        {
 650                struct ip_options_rcu *old, *opt = NULL;
 651
 652                if (optlen > 40)
 653                        goto e_inval;
 654                err = ip_options_get_from_user(sock_net(sk), &opt,
 655                                               optval, optlen);
 656                if (err)
 657                        break;
 658                old = rcu_dereference_protected(inet->inet_opt,
 659                                                lockdep_sock_is_held(sk));
 660                if (inet->is_icsk) {
 661                        struct inet_connection_sock *icsk = inet_csk(sk);
 662#if IS_ENABLED(CONFIG_IPV6)
 663                        if (sk->sk_family == PF_INET ||
 664                            (!((1 << sk->sk_state) &
 665                               (TCPF_LISTEN | TCPF_CLOSE)) &&
 666                             inet->inet_daddr != LOOPBACK4_IPV6)) {
 667#endif
 668                                if (old)
 669                                        icsk->icsk_ext_hdr_len -= old->opt.optlen;
 670                                if (opt)
 671                                        icsk->icsk_ext_hdr_len += opt->opt.optlen;
 672                                icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 673#if IS_ENABLED(CONFIG_IPV6)
 674                        }
 675#endif
 676                }
 677                rcu_assign_pointer(inet->inet_opt, opt);
 678                if (old)
 679                        kfree_rcu(old, rcu);
 680                break;
 681        }
 682        case IP_PKTINFO:
 683                if (val)
 684                        inet->cmsg_flags |= IP_CMSG_PKTINFO;
 685                else
 686                        inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
 687                break;
 688        case IP_RECVTTL:
 689                if (val)
 690                        inet->cmsg_flags |=  IP_CMSG_TTL;
 691                else
 692                        inet->cmsg_flags &= ~IP_CMSG_TTL;
 693                break;
 694        case IP_RECVTOS:
 695                if (val)
 696                        inet->cmsg_flags |=  IP_CMSG_TOS;
 697                else
 698                        inet->cmsg_flags &= ~IP_CMSG_TOS;
 699                break;
 700        case IP_RECVOPTS:
 701                if (val)
 702                        inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
 703                else
 704                        inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
 705                break;
 706        case IP_RETOPTS:
 707                if (val)
 708                        inet->cmsg_flags |= IP_CMSG_RETOPTS;
 709                else
 710                        inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
 711                break;
 712        case IP_PASSSEC:
 713                if (val)
 714                        inet->cmsg_flags |= IP_CMSG_PASSSEC;
 715                else
 716                        inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
 717                break;
 718        case IP_RECVORIGDSTADDR:
 719                if (val)
 720                        inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
 721                else
 722                        inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 723                break;
 724        case IP_CHECKSUM:
 725                if (val) {
 726                        if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
 727                                inet_inc_convert_csum(sk);
 728                                inet->cmsg_flags |= IP_CMSG_CHECKSUM;
 729                        }
 730                } else {
 731                        if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
 732                                inet_dec_convert_csum(sk);
 733                                inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
 734                        }
 735                }
 736                break;
 737        case IP_RECVFRAGSIZE:
 738                if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
 739                        goto e_inval;
 740                if (val)
 741                        inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
 742                else
 743                        inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
 744                break;
 745        case IP_TOS:    /* This sets both TOS and Precedence */
 746                if (sk->sk_type == SOCK_STREAM) {
 747                        val &= ~INET_ECN_MASK;
 748                        val |= inet->tos & INET_ECN_MASK;
 749                }
 750                if (inet->tos != val) {
 751                        inet->tos = val;
 752                        sk->sk_priority = rt_tos2priority(val);
 753                        sk_dst_reset(sk);
 754                }
 755                break;
 756        case IP_TTL:
 757                if (optlen < 1)
 758                        goto e_inval;
 759                if (val != -1 && (val < 1 || val > 255))
 760                        goto e_inval;
 761                inet->uc_ttl = val;
 762                break;
 763        case IP_HDRINCL:
 764                if (sk->sk_type != SOCK_RAW) {
 765                        err = -ENOPROTOOPT;
 766                        break;
 767                }
 768                inet->hdrincl = val ? 1 : 0;
 769                break;
 770        case IP_NODEFRAG:
 771                if (sk->sk_type != SOCK_RAW) {
 772                        err = -ENOPROTOOPT;
 773                        break;
 774                }
 775                inet->nodefrag = val ? 1 : 0;
 776                break;
 777        case IP_BIND_ADDRESS_NO_PORT:
 778                inet->bind_address_no_port = val ? 1 : 0;
 779                break;
 780        case IP_MTU_DISCOVER:
 781                if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 782                        goto e_inval;
 783                inet->pmtudisc = val;
 784                break;
 785        case IP_RECVERR:
 786                inet->recverr = !!val;
 787                if (!val)
 788                        skb_queue_purge(&sk->sk_error_queue);
 789                break;
 790        case IP_MULTICAST_TTL:
 791                if (sk->sk_type == SOCK_STREAM)
 792                        goto e_inval;
 793                if (optlen < 1)
 794                        goto e_inval;
 795                if (val == -1)
 796                        val = 1;
 797                if (val < 0 || val > 255)
 798                        goto e_inval;
 799                inet->mc_ttl = val;
 800                break;
 801        case IP_MULTICAST_LOOP:
 802                if (optlen < 1)
 803                        goto e_inval;
 804                inet->mc_loop = !!val;
 805                break;
 806        case IP_UNICAST_IF:
 807        {
 808                struct net_device *dev = NULL;
 809                int ifindex;
 810                int midx;
 811
 812                if (optlen != sizeof(int))
 813                        goto e_inval;
 814
 815                ifindex = (__force int)ntohl((__force __be32)val);
 816                if (ifindex == 0) {
 817                        inet->uc_index = 0;
 818                        err = 0;
 819                        break;
 820                }
 821
 822                dev = dev_get_by_index(sock_net(sk), ifindex);
 823                err = -EADDRNOTAVAIL;
 824                if (!dev)
 825                        break;
 826
 827                midx = l3mdev_master_ifindex(dev);
 828                dev_put(dev);
 829
 830                err = -EINVAL;
 831                if (sk->sk_bound_dev_if &&
 832                    (!midx || midx != sk->sk_bound_dev_if))
 833                        break;
 834
 835                inet->uc_index = ifindex;
 836                err = 0;
 837                break;
 838        }
 839        case IP_MULTICAST_IF:
 840        {
 841                struct ip_mreqn mreq;
 842                struct net_device *dev = NULL;
 843                int midx;
 844
 845                if (sk->sk_type == SOCK_STREAM)
 846                        goto e_inval;
 847                /*
 848                 *      Check the arguments are allowable
 849                 */
 850
 851                if (optlen < sizeof(struct in_addr))
 852                        goto e_inval;
 853
 854                err = -EFAULT;
 855                if (optlen >= sizeof(struct ip_mreqn)) {
 856                        if (copy_from_user(&mreq, optval, sizeof(mreq)))
 857                                break;
 858                } else {
 859                        memset(&mreq, 0, sizeof(mreq));
 860                        if (optlen >= sizeof(struct ip_mreq)) {
 861                                if (copy_from_user(&mreq, optval,
 862                                                   sizeof(struct ip_mreq)))
 863                                        break;
 864                        } else if (optlen >= sizeof(struct in_addr)) {
 865                                if (copy_from_user(&mreq.imr_address, optval,
 866                                                   sizeof(struct in_addr)))
 867                                        break;
 868                        }
 869                }
 870
 871                if (!mreq.imr_ifindex) {
 872                        if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
 873                                inet->mc_index = 0;
 874                                inet->mc_addr  = 0;
 875                                err = 0;
 876                                break;
 877                        }
 878                        dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
 879                        if (dev)
 880                                mreq.imr_ifindex = dev->ifindex;
 881                } else
 882                        dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
 883
 884
 885                err = -EADDRNOTAVAIL;
 886                if (!dev)
 887                        break;
 888
 889                midx = l3mdev_master_ifindex(dev);
 890
 891                dev_put(dev);
 892
 893                err = -EINVAL;
 894                if (sk->sk_bound_dev_if &&
 895                    mreq.imr_ifindex != sk->sk_bound_dev_if &&
 896                    (!midx || midx != sk->sk_bound_dev_if))
 897                        break;
 898
 899                inet->mc_index = mreq.imr_ifindex;
 900                inet->mc_addr  = mreq.imr_address.s_addr;
 901                err = 0;
 902                break;
 903        }
 904
 905        case IP_ADD_MEMBERSHIP:
 906        case IP_DROP_MEMBERSHIP:
 907        {
 908                struct ip_mreqn mreq;
 909
 910                err = -EPROTO;
 911                if (inet_sk(sk)->is_icsk)
 912                        break;
 913
 914                if (optlen < sizeof(struct ip_mreq))
 915                        goto e_inval;
 916                err = -EFAULT;
 917                if (optlen >= sizeof(struct ip_mreqn)) {
 918                        if (copy_from_user(&mreq, optval, sizeof(mreq)))
 919                                break;
 920                } else {
 921                        memset(&mreq, 0, sizeof(mreq));
 922                        if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
 923                                break;
 924                }
 925
 926                if (optname == IP_ADD_MEMBERSHIP)
 927                        err = ip_mc_join_group(sk, &mreq);
 928                else
 929                        err = ip_mc_leave_group(sk, &mreq);
 930                break;
 931        }
 932        case IP_MSFILTER:
 933        {
 934                struct ip_msfilter *msf;
 935
 936                if (optlen < IP_MSFILTER_SIZE(0))
 937                        goto e_inval;
 938                if (optlen > sysctl_optmem_max) {
 939                        err = -ENOBUFS;
 940                        break;
 941                }
 942                msf = memdup_user(optval, optlen);
 943                if (IS_ERR(msf)) {
 944                        err = PTR_ERR(msf);
 945                        break;
 946                }
 947                /* numsrc >= (1G-4) overflow in 32 bits */
 948                if (msf->imsf_numsrc >= 0x3ffffffcU ||
 949                    msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
 950                        kfree(msf);
 951                        err = -ENOBUFS;
 952                        break;
 953                }
 954                if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
 955                        kfree(msf);
 956                        err = -EINVAL;
 957                        break;
 958                }
 959                err = ip_mc_msfilter(sk, msf, 0);
 960                kfree(msf);
 961                break;
 962        }
 963        case IP_BLOCK_SOURCE:
 964        case IP_UNBLOCK_SOURCE:
 965        case IP_ADD_SOURCE_MEMBERSHIP:
 966        case IP_DROP_SOURCE_MEMBERSHIP:
 967        {
 968                struct ip_mreq_source mreqs;
 969                int omode, add;
 970
 971                if (optlen != sizeof(struct ip_mreq_source))
 972                        goto e_inval;
 973                if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
 974                        err = -EFAULT;
 975                        break;
 976                }
 977                if (optname == IP_BLOCK_SOURCE) {
 978                        omode = MCAST_EXCLUDE;
 979                        add = 1;
 980                } else if (optname == IP_UNBLOCK_SOURCE) {
 981                        omode = MCAST_EXCLUDE;
 982                        add = 0;
 983                } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
 984                        struct ip_mreqn mreq;
 985
 986                        mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 987                        mreq.imr_address.s_addr = mreqs.imr_interface;
 988                        mreq.imr_ifindex = 0;
 989                        err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
 990                        if (err && err != -EADDRINUSE)
 991                                break;
 992                        omode = MCAST_INCLUDE;
 993                        add = 1;
 994                } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
 995                        omode = MCAST_INCLUDE;
 996                        add = 0;
 997                }
 998                err = ip_mc_source(add, omode, sk, &mreqs, 0);
 999                break;
1000        }
1001        case MCAST_JOIN_GROUP:
1002        case MCAST_LEAVE_GROUP:
1003        {
1004                struct group_req greq;
1005                struct sockaddr_in *psin;
1006                struct ip_mreqn mreq;
1007
1008                if (optlen < sizeof(struct group_req))
1009                        goto e_inval;
1010                err = -EFAULT;
1011                if (copy_from_user(&greq, optval, sizeof(greq)))
1012                        break;
1013                psin = (struct sockaddr_in *)&greq.gr_group;
1014                if (psin->sin_family != AF_INET)
1015                        goto e_inval;
1016                memset(&mreq, 0, sizeof(mreq));
1017                mreq.imr_multiaddr = psin->sin_addr;
1018                mreq.imr_ifindex = greq.gr_interface;
1019
1020                if (optname == MCAST_JOIN_GROUP)
1021                        err = ip_mc_join_group(sk, &mreq);
1022                else
1023                        err = ip_mc_leave_group(sk, &mreq);
1024                break;
1025        }
1026        case MCAST_JOIN_SOURCE_GROUP:
1027        case MCAST_LEAVE_SOURCE_GROUP:
1028        case MCAST_BLOCK_SOURCE:
1029        case MCAST_UNBLOCK_SOURCE:
1030        {
1031                struct group_source_req greqs;
1032                struct ip_mreq_source mreqs;
1033                struct sockaddr_in *psin;
1034                int omode, add;
1035
1036                if (optlen != sizeof(struct group_source_req))
1037                        goto e_inval;
1038                if (copy_from_user(&greqs, optval, sizeof(greqs))) {
1039                        err = -EFAULT;
1040                        break;
1041                }
1042                if (greqs.gsr_group.ss_family != AF_INET ||
1043                    greqs.gsr_source.ss_family != AF_INET) {
1044                        err = -EADDRNOTAVAIL;
1045                        break;
1046                }
1047                psin = (struct sockaddr_in *)&greqs.gsr_group;
1048                mreqs.imr_multiaddr = psin->sin_addr.s_addr;
1049                psin = (struct sockaddr_in *)&greqs.gsr_source;
1050                mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
1051                mreqs.imr_interface = 0; /* use index for mc_source */
1052
1053                if (optname == MCAST_BLOCK_SOURCE) {
1054                        omode = MCAST_EXCLUDE;
1055                        add = 1;
1056                } else if (optname == MCAST_UNBLOCK_SOURCE) {
1057                        omode = MCAST_EXCLUDE;
1058                        add = 0;
1059                } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
1060                        struct ip_mreqn mreq;
1061
1062                        psin = (struct sockaddr_in *)&greqs.gsr_group;
1063                        mreq.imr_multiaddr = psin->sin_addr;
1064                        mreq.imr_address.s_addr = 0;
1065                        mreq.imr_ifindex = greqs.gsr_interface;
1066                        err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
1067                        if (err && err != -EADDRINUSE)
1068                                break;
1069                        greqs.gsr_interface = mreq.imr_ifindex;
1070                        omode = MCAST_INCLUDE;
1071                        add = 1;
1072                } else /* MCAST_LEAVE_SOURCE_GROUP */ {
1073                        omode = MCAST_INCLUDE;
1074                        add = 0;
1075                }
1076                err = ip_mc_source(add, omode, sk, &mreqs,
1077                                   greqs.gsr_interface);
1078                break;
1079        }
1080        case MCAST_MSFILTER:
1081        {
1082                struct sockaddr_in *psin;
1083                struct ip_msfilter *msf = NULL;
1084                struct group_filter *gsf = NULL;
1085                int msize, i, ifindex;
1086
1087                if (optlen < GROUP_FILTER_SIZE(0))
1088                        goto e_inval;
1089                if (optlen > sysctl_optmem_max) {
1090                        err = -ENOBUFS;
1091                        break;
1092                }
1093                gsf = memdup_user(optval, optlen);
1094                if (IS_ERR(gsf)) {
1095                        err = PTR_ERR(gsf);
1096                        break;
1097                }
1098
1099                /* numsrc >= (4G-140)/128 overflow in 32 bits */
1100                if (gsf->gf_numsrc >= 0x1ffffff ||
1101                    gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1102                        err = -ENOBUFS;
1103                        goto mc_msf_out;
1104                }
1105                if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
1106                        err = -EINVAL;
1107                        goto mc_msf_out;
1108                }
1109                msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
1110                msf = kmalloc(msize, GFP_KERNEL);
1111                if (!msf) {
1112                        err = -ENOBUFS;
1113                        goto mc_msf_out;
1114                }
1115                ifindex = gsf->gf_interface;
1116                psin = (struct sockaddr_in *)&gsf->gf_group;
1117                if (psin->sin_family != AF_INET) {
1118                        err = -EADDRNOTAVAIL;
1119                        goto mc_msf_out;
1120                }
1121                msf->imsf_multiaddr = psin->sin_addr.s_addr;
1122                msf->imsf_interface = 0;
1123                msf->imsf_fmode = gsf->gf_fmode;
1124                msf->imsf_numsrc = gsf->gf_numsrc;
1125                err = -EADDRNOTAVAIL;
1126                for (i = 0; i < gsf->gf_numsrc; ++i) {
1127                        psin = (struct sockaddr_in *)&gsf->gf_slist[i];
1128
1129                        if (psin->sin_family != AF_INET)
1130                                goto mc_msf_out;
1131                        msf->imsf_slist[i] = psin->sin_addr.s_addr;
1132                }
1133                kfree(gsf);
1134                gsf = NULL;
1135
1136                err = ip_mc_msfilter(sk, msf, ifindex);
1137mc_msf_out:
1138                kfree(msf);
1139                kfree(gsf);
1140                break;
1141        }
1142        case IP_MULTICAST_ALL:
1143                if (optlen < 1)
1144                        goto e_inval;
1145                if (val != 0 && val != 1)
1146                        goto e_inval;
1147                inet->mc_all = val;
1148                break;
1149
1150        case IP_FREEBIND:
1151                if (optlen < 1)
1152                        goto e_inval;
1153                inet->freebind = !!val;
1154                break;
1155
1156        case IP_IPSEC_POLICY:
1157        case IP_XFRM_POLICY:
1158                err = -EPERM;
1159                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1160                        break;
1161                err = xfrm_user_policy(sk, optname, optval, optlen);
1162                break;
1163
1164        case IP_TRANSPARENT:
1165                if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1166                    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1167                        err = -EPERM;
1168                        break;
1169                }
1170                if (optlen < 1)
1171                        goto e_inval;
1172                inet->transparent = !!val;
1173                break;
1174
1175        case IP_MINTTL:
1176                if (optlen < 1)
1177                        goto e_inval;
1178                if (val < 0 || val > 255)
1179                        goto e_inval;
1180                inet->min_ttl = val;
1181                break;
1182
1183        default:
1184                err = -ENOPROTOOPT;
1185                break;
1186        }
1187        release_sock(sk);
1188        if (needs_rtnl)
1189                rtnl_unlock();
1190        return err;
1191
1192e_inval:
1193        release_sock(sk);
1194        if (needs_rtnl)
1195                rtnl_unlock();
1196        return -EINVAL;
1197}
1198
1199/**
1200 * ipv4_pktinfo_prepare - transfer some info from rtable to skb
1201 * @sk: socket
1202 * @skb: buffer
1203 *
1204 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1205 * destination in skb->cb[] before dst drop.
1206 * This way, receiver doesn't make cache line misses to read rtable.
1207 */
1208void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1209{
1210        struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1211        bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1212                       ipv6_sk_rxinfo(sk);
1213
1214        if (prepare && skb_rtable(skb)) {
1215                /* skb->cb is overloaded: prior to this point it is IP{6}CB
1216                 * which has interface index (iif) as the first member of the
1217                 * underlying inet{6}_skb_parm struct. This code then overlays
1218                 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1219                 * element so the iif is picked up from the prior IPCB. If iif
1220                 * is the loopback interface, then return the sending interface
1221                 * (e.g., process binds socket to eth0 for Tx which is
1222                 * redirected to loopback in the rtable/dst).
1223                 */
1224                struct rtable *rt = skb_rtable(skb);
1225                bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
1226
1227                if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
1228                        pktinfo->ipi_ifindex = inet_iif(skb);
1229                else if (l3slave && rt && rt->rt_iif)
1230                        pktinfo->ipi_ifindex = rt->rt_iif;
1231
1232                pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1233        } else {
1234                pktinfo->ipi_ifindex = 0;
1235                pktinfo->ipi_spec_dst.s_addr = 0;
1236        }
1237        skb_dst_drop(skb);
1238}
1239
1240int ip_setsockopt(struct sock *sk, int level,
1241                int optname, char __user *optval, unsigned int optlen)
1242{
1243        int err;
1244
1245        if (level != SOL_IP)
1246                return -ENOPROTOOPT;
1247
1248        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1249#if IS_ENABLED(CONFIG_BPFILTER_UMH)
1250        if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
1251            optname < BPFILTER_IPT_SET_MAX)
1252                err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
1253#endif
1254#ifdef CONFIG_NETFILTER
1255        /* we need to exclude all possible ENOPROTOOPTs except default case */
1256        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1257                        optname != IP_IPSEC_POLICY &&
1258                        optname != IP_XFRM_POLICY &&
1259                        !ip_mroute_opt(optname))
1260                err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1261#endif
1262        return err;
1263}
1264EXPORT_SYMBOL(ip_setsockopt);
1265
1266#ifdef CONFIG_COMPAT
1267int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1268                         char __user *optval, unsigned int optlen)
1269{
1270        int err;
1271
1272        if (level != SOL_IP)
1273                return -ENOPROTOOPT;
1274
1275        if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
1276                return compat_mc_setsockopt(sk, level, optname, optval, optlen,
1277                        ip_setsockopt);
1278
1279        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1280#ifdef CONFIG_NETFILTER
1281        /* we need to exclude all possible ENOPROTOOPTs except default case */
1282        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1283                        optname != IP_IPSEC_POLICY &&
1284                        optname != IP_XFRM_POLICY &&
1285                        !ip_mroute_opt(optname))
1286                err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
1287                                           optlen);
1288#endif
1289        return err;
1290}
1291EXPORT_SYMBOL(compat_ip_setsockopt);
1292#endif
1293
1294/*
1295 *      Get the options. Note for future reference. The GET of IP options gets
1296 *      the _received_ ones. The set sets the _sent_ ones.
1297 */
1298
1299static bool getsockopt_needs_rtnl(int optname)
1300{
1301        switch (optname) {
1302        case IP_MSFILTER:
1303        case MCAST_MSFILTER:
1304                return true;
1305        }
1306        return false;
1307}
1308
1309static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1310                            char __user *optval, int __user *optlen, unsigned int flags)
1311{
1312        struct inet_sock *inet = inet_sk(sk);
1313        bool needs_rtnl = getsockopt_needs_rtnl(optname);
1314        int val, err = 0;
1315        int len;
1316
1317        if (level != SOL_IP)
1318                return -EOPNOTSUPP;
1319
1320        if (ip_mroute_opt(optname))
1321                return ip_mroute_getsockopt(sk, optname, optval, optlen);
1322
1323        if (get_user(len, optlen))
1324                return -EFAULT;
1325        if (len < 0)
1326                return -EINVAL;
1327
1328        if (needs_rtnl)
1329                rtnl_lock();
1330        lock_sock(sk);
1331
1332        switch (optname) {
1333        case IP_OPTIONS:
1334        {
1335                unsigned char optbuf[sizeof(struct ip_options)+40];
1336                struct ip_options *opt = (struct ip_options *)optbuf;
1337                struct ip_options_rcu *inet_opt;
1338
1339                inet_opt = rcu_dereference_protected(inet->inet_opt,
1340                                                     lockdep_sock_is_held(sk));
1341                opt->optlen = 0;
1342                if (inet_opt)
1343                        memcpy(optbuf, &inet_opt->opt,
1344                               sizeof(struct ip_options) +
1345                               inet_opt->opt.optlen);
1346                release_sock(sk);
1347
1348                if (opt->optlen == 0)
1349                        return put_user(0, optlen);
1350
1351                ip_options_undo(opt);
1352
1353                len = min_t(unsigned int, len, opt->optlen);
1354                if (put_user(len, optlen))
1355                        return -EFAULT;
1356                if (copy_to_user(optval, opt->__data, len))
1357                        return -EFAULT;
1358                return 0;
1359        }
1360        case IP_PKTINFO:
1361                val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1362                break;
1363        case IP_RECVTTL:
1364                val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1365                break;
1366        case IP_RECVTOS:
1367                val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1368                break;
1369        case IP_RECVOPTS:
1370                val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1371                break;
1372        case IP_RETOPTS:
1373                val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1374                break;
1375        case IP_PASSSEC:
1376                val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1377                break;
1378        case IP_RECVORIGDSTADDR:
1379                val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1380                break;
1381        case IP_CHECKSUM:
1382                val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1383                break;
1384        case IP_RECVFRAGSIZE:
1385                val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
1386                break;
1387        case IP_TOS:
1388                val = inet->tos;
1389                break;
1390        case IP_TTL:
1391        {
1392                struct net *net = sock_net(sk);
1393                val = (inet->uc_ttl == -1 ?
1394                       net->ipv4.sysctl_ip_default_ttl :
1395                       inet->uc_ttl);
1396                break;
1397        }
1398        case IP_HDRINCL:
1399                val = inet->hdrincl;
1400                break;
1401        case IP_NODEFRAG:
1402                val = inet->nodefrag;
1403                break;
1404        case IP_BIND_ADDRESS_NO_PORT:
1405                val = inet->bind_address_no_port;
1406                break;
1407        case IP_MTU_DISCOVER:
1408                val = inet->pmtudisc;
1409                break;
1410        case IP_MTU:
1411        {
1412                struct dst_entry *dst;
1413                val = 0;
1414                dst = sk_dst_get(sk);
1415                if (dst) {
1416                        val = dst_mtu(dst);
1417                        dst_release(dst);
1418                }
1419                if (!val) {
1420                        release_sock(sk);
1421                        return -ENOTCONN;
1422                }
1423                break;
1424        }
1425        case IP_RECVERR:
1426                val = inet->recverr;
1427                break;
1428        case IP_MULTICAST_TTL:
1429                val = inet->mc_ttl;
1430                break;
1431        case IP_MULTICAST_LOOP:
1432                val = inet->mc_loop;
1433                break;
1434        case IP_UNICAST_IF:
1435                val = (__force int)htonl((__u32) inet->uc_index);
1436                break;
1437        case IP_MULTICAST_IF:
1438        {
1439                struct in_addr addr;
1440                len = min_t(unsigned int, len, sizeof(struct in_addr));
1441                addr.s_addr = inet->mc_addr;
1442                release_sock(sk);
1443
1444                if (put_user(len, optlen))
1445                        return -EFAULT;
1446                if (copy_to_user(optval, &addr, len))
1447                        return -EFAULT;
1448                return 0;
1449        }
1450        case IP_MSFILTER:
1451        {
1452                struct ip_msfilter msf;
1453
1454                if (len < IP_MSFILTER_SIZE(0)) {
1455                        err = -EINVAL;
1456                        goto out;
1457                }
1458                if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1459                        err = -EFAULT;
1460                        goto out;
1461                }
1462                err = ip_mc_msfget(sk, &msf,
1463                                   (struct ip_msfilter __user *)optval, optlen);
1464                goto out;
1465        }
1466        case MCAST_MSFILTER:
1467        {
1468                struct group_filter gsf;
1469
1470                if (len < GROUP_FILTER_SIZE(0)) {
1471                        err = -EINVAL;
1472                        goto out;
1473                }
1474                if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1475                        err = -EFAULT;
1476                        goto out;
1477                }
1478                err = ip_mc_gsfget(sk, &gsf,
1479                                   (struct group_filter __user *)optval,
1480                                   optlen);
1481                goto out;
1482        }
1483        case IP_MULTICAST_ALL:
1484                val = inet->mc_all;
1485                break;
1486        case IP_PKTOPTIONS:
1487        {
1488                struct msghdr msg;
1489
1490                release_sock(sk);
1491
1492                if (sk->sk_type != SOCK_STREAM)
1493                        return -ENOPROTOOPT;
1494
1495                msg.msg_control = (__force void *) optval;
1496                msg.msg_controllen = len;
1497                msg.msg_flags = flags;
1498
1499                if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1500                        struct in_pktinfo info;
1501
1502                        info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1503                        info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1504                        info.ipi_ifindex = inet->mc_index;
1505                        put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1506                }
1507                if (inet->cmsg_flags & IP_CMSG_TTL) {
1508                        int hlim = inet->mc_ttl;
1509                        put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1510                }
1511                if (inet->cmsg_flags & IP_CMSG_TOS) {
1512                        int tos = inet->rcv_tos;
1513                        put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1514                }
1515                len -= msg.msg_controllen;
1516                return put_user(len, optlen);
1517        }
1518        case IP_FREEBIND:
1519                val = inet->freebind;
1520                break;
1521        case IP_TRANSPARENT:
1522                val = inet->transparent;
1523                break;
1524        case IP_MINTTL:
1525                val = inet->min_ttl;
1526                break;
1527        default:
1528                release_sock(sk);
1529                return -ENOPROTOOPT;
1530        }
1531        release_sock(sk);
1532
1533        if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1534                unsigned char ucval = (unsigned char)val;
1535                len = 1;
1536                if (put_user(len, optlen))
1537                        return -EFAULT;
1538                if (copy_to_user(optval, &ucval, 1))
1539                        return -EFAULT;
1540        } else {
1541                len = min_t(unsigned int, sizeof(int), len);
1542                if (put_user(len, optlen))
1543                        return -EFAULT;
1544                if (copy_to_user(optval, &val, len))
1545                        return -EFAULT;
1546        }
1547        return 0;
1548
1549out:
1550        release_sock(sk);
1551        if (needs_rtnl)
1552                rtnl_unlock();
1553        return err;
1554}
1555
1556int ip_getsockopt(struct sock *sk, int level,
1557                  int optname, char __user *optval, int __user *optlen)
1558{
1559        int err;
1560
1561        err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1562#if IS_ENABLED(CONFIG_BPFILTER_UMH)
1563        if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1564            optname < BPFILTER_IPT_GET_MAX)
1565                err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1566#endif
1567#ifdef CONFIG_NETFILTER
1568        /* we need to exclude all possible ENOPROTOOPTs except default case */
1569        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1570                        !ip_mroute_opt(optname)) {
1571                int len;
1572
1573                if (get_user(len, optlen))
1574                        return -EFAULT;
1575
1576                err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
1577                if (err >= 0)
1578                        err = put_user(len, optlen);
1579                return err;
1580        }
1581#endif
1582        return err;
1583}
1584EXPORT_SYMBOL(ip_getsockopt);
1585
1586#ifdef CONFIG_COMPAT
1587int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1588                         char __user *optval, int __user *optlen)
1589{
1590        int err;
1591
1592        if (optname == MCAST_MSFILTER)
1593                return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1594                        ip_getsockopt);
1595
1596        err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1597                MSG_CMSG_COMPAT);
1598
1599#if IS_ENABLED(CONFIG_BPFILTER_UMH)
1600        if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1601            optname < BPFILTER_IPT_GET_MAX)
1602                err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1603#endif
1604#ifdef CONFIG_NETFILTER
1605        /* we need to exclude all possible ENOPROTOOPTs except default case */
1606        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1607                        !ip_mroute_opt(optname)) {
1608                int len;
1609
1610                if (get_user(len, optlen))
1611                        return -EFAULT;
1612
1613                err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
1614                if (err >= 0)
1615                        err = put_user(len, optlen);
1616                return err;
1617        }
1618#endif
1619        return err;
1620}
1621EXPORT_SYMBOL(compat_ip_getsockopt);
1622#endif
1623