linux/net/ipv6/icmp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Internet Control Message Protocol (ICMPv6)
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on net/ipv4/icmp.c
  10 *
  11 *      RFC 1885
  12 */
  13
  14/*
  15 *      Changes:
  16 *
  17 *      Andi Kleen              :       exception handling
  18 *      Andi Kleen                      add rate limits. never reply to a icmp.
  19 *                                      add more length checks and other fixes.
  20 *      yoshfuji                :       ensure to sent parameter problem for
  21 *                                      fragments.
  22 *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  23 *      Randy Dunlap and
  24 *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  25 *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  26 */
  27
  28#define pr_fmt(fmt) "IPv6: " fmt
  29
  30#include <linux/module.h>
  31#include <linux/errno.h>
  32#include <linux/types.h>
  33#include <linux/socket.h>
  34#include <linux/in.h>
  35#include <linux/kernel.h>
  36#include <linux/sockios.h>
  37#include <linux/net.h>
  38#include <linux/skbuff.h>
  39#include <linux/init.h>
  40#include <linux/netfilter.h>
  41#include <linux/slab.h>
  42
  43#ifdef CONFIG_SYSCTL
  44#include <linux/sysctl.h>
  45#endif
  46
  47#include <linux/inet.h>
  48#include <linux/netdevice.h>
  49#include <linux/icmpv6.h>
  50
  51#include <net/ip.h>
  52#include <net/sock.h>
  53
  54#include <net/ipv6.h>
  55#include <net/ip6_checksum.h>
  56#include <net/ping.h>
  57#include <net/protocol.h>
  58#include <net/raw.h>
  59#include <net/rawv6.h>
  60#include <net/transp_v6.h>
  61#include <net/ip6_route.h>
  62#include <net/addrconf.h>
  63#include <net/icmp.h>
  64#include <net/xfrm.h>
  65#include <net/inet_common.h>
  66#include <net/dsfield.h>
  67#include <net/l3mdev.h>
  68
  69#include <linux/uaccess.h>
  70
  71/*
  72 *      The ICMP socket(s). This is the most convenient way to flow control
  73 *      our ICMP output as well as maintain a clean interface throughout
  74 *      all layers. All Socketless IP sends will soon be gone.
  75 *
  76 *      On SMP we have one ICMP socket per-cpu.
  77 */
  78static struct sock *icmpv6_sk(struct net *net)
  79{
  80        return this_cpu_read(*net->ipv6.icmp_sk);
  81}
  82
  83static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  84                       u8 type, u8 code, int offset, __be32 info)
  85{
  86        /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  87        struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  88        struct net *net = dev_net(skb->dev);
  89
  90        if (type == ICMPV6_PKT_TOOBIG)
  91                ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
  92        else if (type == NDISC_REDIRECT)
  93                ip6_redirect(skb, net, skb->dev->ifindex, 0,
  94                             sock_net_uid(net, NULL));
  95
  96        if (!(type & ICMPV6_INFOMSG_MASK))
  97                if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
  98                        ping_err(skb, offset, ntohl(info));
  99
 100        return 0;
 101}
 102
 103static int icmpv6_rcv(struct sk_buff *skb);
 104
 105static const struct inet6_protocol icmpv6_protocol = {
 106        .handler        =       icmpv6_rcv,
 107        .err_handler    =       icmpv6_err,
 108        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 109};
 110
 111/* Called with BH disabled */
 112static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 113{
 114        struct sock *sk;
 115
 116        sk = icmpv6_sk(net);
 117        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 118                /* This can happen if the output path (f.e. SIT or
 119                 * ip6ip6 tunnel) signals dst_link_failure() for an
 120                 * outgoing ICMP6 packet.
 121                 */
 122                return NULL;
 123        }
 124        return sk;
 125}
 126
 127static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 128{
 129        spin_unlock(&sk->sk_lock.slock);
 130}
 131
 132/*
 133 * Figure out, may we reply to this packet with icmp error.
 134 *
 135 * We do not reply, if:
 136 *      - it was icmp error message.
 137 *      - it is truncated, so that it is known, that protocol is ICMPV6
 138 *        (i.e. in the middle of some exthdr)
 139 *
 140 *      --ANK (980726)
 141 */
 142
 143static bool is_ineligible(const struct sk_buff *skb)
 144{
 145        int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 146        int len = skb->len - ptr;
 147        __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 148        __be16 frag_off;
 149
 150        if (len < 0)
 151                return true;
 152
 153        ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 154        if (ptr < 0)
 155                return false;
 156        if (nexthdr == IPPROTO_ICMPV6) {
 157                u8 _type, *tp;
 158                tp = skb_header_pointer(skb,
 159                        ptr+offsetof(struct icmp6hdr, icmp6_type),
 160                        sizeof(_type), &_type);
 161
 162                /* Based on RFC 8200, Section 4.5 Fragment Header, return
 163                 * false if this is a fragment packet with no icmp header info.
 164                 */
 165                if (!tp && frag_off != 0)
 166                        return false;
 167                else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 168                        return true;
 169        }
 170        return false;
 171}
 172
 173static bool icmpv6_mask_allow(struct net *net, int type)
 174{
 175        if (type > ICMPV6_MSG_MAX)
 176                return true;
 177
 178        /* Limit if icmp type is set in ratemask. */
 179        if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
 180                return true;
 181
 182        return false;
 183}
 184
 185static bool icmpv6_global_allow(struct net *net, int type)
 186{
 187        if (icmpv6_mask_allow(net, type))
 188                return true;
 189
 190        if (icmp_global_allow())
 191                return true;
 192
 193        return false;
 194}
 195
 196/*
 197 * Check the ICMP output rate limit
 198 */
 199static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 200                               struct flowi6 *fl6)
 201{
 202        struct net *net = sock_net(sk);
 203        struct dst_entry *dst;
 204        bool res = false;
 205
 206        if (icmpv6_mask_allow(net, type))
 207                return true;
 208
 209        /*
 210         * Look up the output route.
 211         * XXX: perhaps the expire for routing entries cloned by
 212         * this lookup should be more aggressive (not longer than timeout).
 213         */
 214        dst = ip6_route_output(net, sk, fl6);
 215        if (dst->error) {
 216                IP6_INC_STATS(net, ip6_dst_idev(dst),
 217                              IPSTATS_MIB_OUTNOROUTES);
 218        } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 219                res = true;
 220        } else {
 221                struct rt6_info *rt = (struct rt6_info *)dst;
 222                int tmo = net->ipv6.sysctl.icmpv6_time;
 223                struct inet_peer *peer;
 224
 225                /* Give more bandwidth to wider prefixes. */
 226                if (rt->rt6i_dst.plen < 128)
 227                        tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 228
 229                peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 230                res = inet_peer_xrlim_allow(peer, tmo);
 231                if (peer)
 232                        inet_putpeer(peer);
 233        }
 234        dst_release(dst);
 235        return res;
 236}
 237
 238static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
 239                                  struct flowi6 *fl6)
 240{
 241        struct net *net = sock_net(sk);
 242        struct dst_entry *dst;
 243        bool res = false;
 244
 245        dst = ip6_route_output(net, sk, fl6);
 246        if (!dst->error) {
 247                struct rt6_info *rt = (struct rt6_info *)dst;
 248                struct in6_addr prefsrc;
 249
 250                rt6_get_prefsrc(rt, &prefsrc);
 251                res = !ipv6_addr_any(&prefsrc);
 252        }
 253        dst_release(dst);
 254        return res;
 255}
 256
 257/*
 258 *      an inline helper for the "simple" if statement below
 259 *      checks if parameter problem report is caused by an
 260 *      unrecognized IPv6 option that has the Option Type
 261 *      highest-order two bits set to 10
 262 */
 263
 264static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 265{
 266        u8 _optval, *op;
 267
 268        offset += skb_network_offset(skb);
 269        op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 270        if (!op)
 271                return true;
 272        return (*op & 0xC0) == 0x80;
 273}
 274
 275void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 276                                struct icmp6hdr *thdr, int len)
 277{
 278        struct sk_buff *skb;
 279        struct icmp6hdr *icmp6h;
 280
 281        skb = skb_peek(&sk->sk_write_queue);
 282        if (!skb)
 283                return;
 284
 285        icmp6h = icmp6_hdr(skb);
 286        memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 287        icmp6h->icmp6_cksum = 0;
 288
 289        if (skb_queue_len(&sk->sk_write_queue) == 1) {
 290                skb->csum = csum_partial(icmp6h,
 291                                        sizeof(struct icmp6hdr), skb->csum);
 292                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 293                                                      &fl6->daddr,
 294                                                      len, fl6->flowi6_proto,
 295                                                      skb->csum);
 296        } else {
 297                __wsum tmp_csum = 0;
 298
 299                skb_queue_walk(&sk->sk_write_queue, skb) {
 300                        tmp_csum = csum_add(tmp_csum, skb->csum);
 301                }
 302
 303                tmp_csum = csum_partial(icmp6h,
 304                                        sizeof(struct icmp6hdr), tmp_csum);
 305                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 306                                                      &fl6->daddr,
 307                                                      len, fl6->flowi6_proto,
 308                                                      tmp_csum);
 309        }
 310        ip6_push_pending_frames(sk);
 311}
 312
 313struct icmpv6_msg {
 314        struct sk_buff  *skb;
 315        int             offset;
 316        uint8_t         type;
 317};
 318
 319static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 320{
 321        struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 322        struct sk_buff *org_skb = msg->skb;
 323        __wsum csum;
 324
 325        csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 326                                      to, len);
 327        skb->csum = csum_block_add(skb->csum, csum, odd);
 328        if (!(msg->type & ICMPV6_INFOMSG_MASK))
 329                nf_ct_attach(skb, org_skb);
 330        return 0;
 331}
 332
 333#if IS_ENABLED(CONFIG_IPV6_MIP6)
 334static void mip6_addr_swap(struct sk_buff *skb)
 335{
 336        struct ipv6hdr *iph = ipv6_hdr(skb);
 337        struct inet6_skb_parm *opt = IP6CB(skb);
 338        struct ipv6_destopt_hao *hao;
 339        struct in6_addr tmp;
 340        int off;
 341
 342        if (opt->dsthao) {
 343                off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 344                if (likely(off >= 0)) {
 345                        hao = (struct ipv6_destopt_hao *)
 346                                        (skb_network_header(skb) + off);
 347                        tmp = iph->saddr;
 348                        iph->saddr = hao->addr;
 349                        hao->addr = tmp;
 350                }
 351        }
 352}
 353#else
 354static inline void mip6_addr_swap(struct sk_buff *skb) {}
 355#endif
 356
 357static struct dst_entry *icmpv6_route_lookup(struct net *net,
 358                                             struct sk_buff *skb,
 359                                             struct sock *sk,
 360                                             struct flowi6 *fl6)
 361{
 362        struct dst_entry *dst, *dst2;
 363        struct flowi6 fl2;
 364        int err;
 365
 366        err = ip6_dst_lookup(net, sk, &dst, fl6);
 367        if (err)
 368                return ERR_PTR(err);
 369
 370        /*
 371         * We won't send icmp if the destination is known
 372         * anycast.
 373         */
 374        if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 375                net_dbg_ratelimited("icmp6_send: acast source\n");
 376                dst_release(dst);
 377                return ERR_PTR(-EINVAL);
 378        }
 379
 380        /* No need to clone since we're just using its address. */
 381        dst2 = dst;
 382
 383        dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 384        if (!IS_ERR(dst)) {
 385                if (dst != dst2)
 386                        return dst;
 387        } else {
 388                if (PTR_ERR(dst) == -EPERM)
 389                        dst = NULL;
 390                else
 391                        return dst;
 392        }
 393
 394        err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 395        if (err)
 396                goto relookup_failed;
 397
 398        err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 399        if (err)
 400                goto relookup_failed;
 401
 402        dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 403        if (!IS_ERR(dst2)) {
 404                dst_release(dst);
 405                dst = dst2;
 406        } else {
 407                err = PTR_ERR(dst2);
 408                if (err == -EPERM) {
 409                        dst_release(dst);
 410                        return dst2;
 411                } else
 412                        goto relookup_failed;
 413        }
 414
 415relookup_failed:
 416        if (dst)
 417                return dst;
 418        return ERR_PTR(err);
 419}
 420
 421static struct net_device *icmp6_dev(const struct sk_buff *skb)
 422{
 423        struct net_device *dev = skb->dev;
 424
 425        /* for local traffic to local address, skb dev is the loopback
 426         * device. Check if there is a dst attached to the skb and if so
 427         * get the real device index. Same is needed for replies to a link
 428         * local address on a device enslaved to an L3 master device
 429         */
 430        if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 431                const struct rt6_info *rt6 = skb_rt6_info(skb);
 432
 433                if (rt6)
 434                        dev = rt6->rt6i_idev->dev;
 435        }
 436
 437        return dev;
 438}
 439
 440static int icmp6_iif(const struct sk_buff *skb)
 441{
 442        return icmp6_dev(skb)->ifindex;
 443}
 444
 445/*
 446 *      Send an ICMP message in response to a packet in error
 447 */
 448void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 449                const struct in6_addr *force_saddr)
 450{
 451        struct inet6_dev *idev = NULL;
 452        struct ipv6hdr *hdr = ipv6_hdr(skb);
 453        struct sock *sk;
 454        struct net *net;
 455        struct ipv6_pinfo *np;
 456        const struct in6_addr *saddr = NULL;
 457        struct dst_entry *dst;
 458        struct icmp6hdr tmp_hdr;
 459        struct flowi6 fl6;
 460        struct icmpv6_msg msg;
 461        struct ipcm6_cookie ipc6;
 462        int iif = 0;
 463        int addr_type = 0;
 464        int len;
 465        u32 mark;
 466
 467        if ((u8 *)hdr < skb->head ||
 468            (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 469                return;
 470
 471        if (!skb->dev)
 472                return;
 473        net = dev_net(skb->dev);
 474        mark = IP6_REPLY_MARK(net, skb->mark);
 475        /*
 476         *      Make sure we respect the rules
 477         *      i.e. RFC 1885 2.4(e)
 478         *      Rule (e.1) is enforced by not using icmp6_send
 479         *      in any code that processes icmp errors.
 480         */
 481        addr_type = ipv6_addr_type(&hdr->daddr);
 482
 483        if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 484            ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 485                saddr = &hdr->daddr;
 486
 487        /*
 488         *      Dest addr check
 489         */
 490
 491        if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 492                if (type != ICMPV6_PKT_TOOBIG &&
 493                    !(type == ICMPV6_PARAMPROB &&
 494                      code == ICMPV6_UNK_OPTION &&
 495                      (opt_unrec(skb, info))))
 496                        return;
 497
 498                saddr = NULL;
 499        }
 500
 501        addr_type = ipv6_addr_type(&hdr->saddr);
 502
 503        /*
 504         *      Source addr check
 505         */
 506
 507        if (__ipv6_addr_needs_scope_id(addr_type)) {
 508                iif = icmp6_iif(skb);
 509        } else {
 510                /*
 511                 * The source device is used for looking up which routing table
 512                 * to use for sending an ICMP error.
 513                 */
 514                iif = l3mdev_master_ifindex(skb->dev);
 515        }
 516
 517        /*
 518         *      Must not send error if the source does not uniquely
 519         *      identify a single node (RFC2463 Section 2.4).
 520         *      We check unspecified / multicast addresses here,
 521         *      and anycast addresses will be checked later.
 522         */
 523        if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 524                net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 525                                    &hdr->saddr, &hdr->daddr);
 526                return;
 527        }
 528
 529        /*
 530         *      Never answer to a ICMP packet.
 531         */
 532        if (is_ineligible(skb)) {
 533                net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 534                                    &hdr->saddr, &hdr->daddr);
 535                return;
 536        }
 537
 538        /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 539        local_bh_disable();
 540
 541        /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 542        if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
 543                goto out_bh_enable;
 544
 545        mip6_addr_swap(skb);
 546
 547        sk = icmpv6_xmit_lock(net);
 548        if (!sk)
 549                goto out_bh_enable;
 550
 551        memset(&fl6, 0, sizeof(fl6));
 552        fl6.flowi6_proto = IPPROTO_ICMPV6;
 553        fl6.daddr = hdr->saddr;
 554        if (force_saddr)
 555                saddr = force_saddr;
 556        if (saddr) {
 557                fl6.saddr = *saddr;
 558        } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
 559                /* select a more meaningful saddr from input if */
 560                struct net_device *in_netdev;
 561
 562                in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
 563                if (in_netdev) {
 564                        ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
 565                                           inet6_sk(sk)->srcprefs,
 566                                           &fl6.saddr);
 567                        dev_put(in_netdev);
 568                }
 569        }
 570        fl6.flowi6_mark = mark;
 571        fl6.flowi6_oif = iif;
 572        fl6.fl6_icmp_type = type;
 573        fl6.fl6_icmp_code = code;
 574        fl6.flowi6_uid = sock_net_uid(net, NULL);
 575        fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 576        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 577
 578        np = inet6_sk(sk);
 579
 580        if (!icmpv6_xrlim_allow(sk, type, &fl6))
 581                goto out;
 582
 583        tmp_hdr.icmp6_type = type;
 584        tmp_hdr.icmp6_code = code;
 585        tmp_hdr.icmp6_cksum = 0;
 586        tmp_hdr.icmp6_pointer = htonl(info);
 587
 588        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 589                fl6.flowi6_oif = np->mcast_oif;
 590        else if (!fl6.flowi6_oif)
 591                fl6.flowi6_oif = np->ucast_oif;
 592
 593        ipcm6_init_sk(&ipc6, np);
 594        ipc6.sockc.mark = mark;
 595        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 596
 597        dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 598        if (IS_ERR(dst))
 599                goto out;
 600
 601        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 602
 603        msg.skb = skb;
 604        msg.offset = skb_network_offset(skb);
 605        msg.type = type;
 606
 607        len = skb->len - msg.offset;
 608        len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 609        if (len < 0) {
 610                net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 611                                    &hdr->saddr, &hdr->daddr);
 612                goto out_dst_release;
 613        }
 614
 615        rcu_read_lock();
 616        idev = __in6_dev_get(skb->dev);
 617
 618        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 619                            len + sizeof(struct icmp6hdr),
 620                            sizeof(struct icmp6hdr),
 621                            &ipc6, &fl6, (struct rt6_info *)dst,
 622                            MSG_DONTWAIT)) {
 623                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 624                ip6_flush_pending_frames(sk);
 625        } else {
 626                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 627                                           len + sizeof(struct icmp6hdr));
 628        }
 629        rcu_read_unlock();
 630out_dst_release:
 631        dst_release(dst);
 632out:
 633        icmpv6_xmit_unlock(sk);
 634out_bh_enable:
 635        local_bh_enable();
 636}
 637EXPORT_SYMBOL(icmp6_send);
 638
 639/* Slightly more convenient version of icmp6_send.
 640 */
 641void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 642{
 643        icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 644        kfree_skb(skb);
 645}
 646
 647/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 648 * if sufficient data bytes are available
 649 * @nhs is the size of the tunnel header(s) :
 650 *  Either an IPv4 header for SIT encap
 651 *         an IPv4 header + GRE header for GRE encap
 652 */
 653int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 654                               unsigned int data_len)
 655{
 656        struct in6_addr temp_saddr;
 657        struct rt6_info *rt;
 658        struct sk_buff *skb2;
 659        u32 info = 0;
 660
 661        if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 662                return 1;
 663
 664        /* RFC 4884 (partial) support for ICMP extensions */
 665        if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 666                data_len = 0;
 667
 668        skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 669
 670        if (!skb2)
 671                return 1;
 672
 673        skb_dst_drop(skb2);
 674        skb_pull(skb2, nhs);
 675        skb_reset_network_header(skb2);
 676
 677        rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 678                        skb, 0);
 679
 680        if (rt && rt->dst.dev)
 681                skb2->dev = rt->dst.dev;
 682
 683        ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 684
 685        if (data_len) {
 686                /* RFC 4884 (partial) support :
 687                 * insert 0 padding at the end, before the extensions
 688                 */
 689                __skb_push(skb2, nhs);
 690                skb_reset_network_header(skb2);
 691                memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 692                memset(skb2->data + data_len - nhs, 0, nhs);
 693                /* RFC 4884 4.5 : Length is measured in 64-bit words,
 694                 * and stored in reserved[0]
 695                 */
 696                info = (data_len/8) << 24;
 697        }
 698        if (type == ICMP_TIME_EXCEEDED)
 699                icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 700                           info, &temp_saddr);
 701        else
 702                icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 703                           info, &temp_saddr);
 704        if (rt)
 705                ip6_rt_put(rt);
 706
 707        kfree_skb(skb2);
 708
 709        return 0;
 710}
 711EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 712
 713static void icmpv6_echo_reply(struct sk_buff *skb)
 714{
 715        struct net *net = dev_net(skb->dev);
 716        struct sock *sk;
 717        struct inet6_dev *idev;
 718        struct ipv6_pinfo *np;
 719        const struct in6_addr *saddr = NULL;
 720        struct icmp6hdr *icmph = icmp6_hdr(skb);
 721        struct icmp6hdr tmp_hdr;
 722        struct flowi6 fl6;
 723        struct icmpv6_msg msg;
 724        struct dst_entry *dst;
 725        struct ipcm6_cookie ipc6;
 726        u32 mark = IP6_REPLY_MARK(net, skb->mark);
 727        bool acast;
 728
 729        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
 730            net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
 731                return;
 732
 733        saddr = &ipv6_hdr(skb)->daddr;
 734
 735        acast = ipv6_anycast_destination(skb_dst(skb), saddr);
 736        if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
 737                return;
 738
 739        if (!ipv6_unicast_destination(skb) &&
 740            !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
 741                saddr = NULL;
 742
 743        memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 744        tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 745
 746        memset(&fl6, 0, sizeof(fl6));
 747        if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
 748                fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
 749
 750        fl6.flowi6_proto = IPPROTO_ICMPV6;
 751        fl6.daddr = ipv6_hdr(skb)->saddr;
 752        if (saddr)
 753                fl6.saddr = *saddr;
 754        fl6.flowi6_oif = icmp6_iif(skb);
 755        fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 756        fl6.flowi6_mark = mark;
 757        fl6.flowi6_uid = sock_net_uid(net, NULL);
 758        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 759
 760        local_bh_disable();
 761        sk = icmpv6_xmit_lock(net);
 762        if (!sk)
 763                goto out_bh_enable;
 764        np = inet6_sk(sk);
 765
 766        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 767                fl6.flowi6_oif = np->mcast_oif;
 768        else if (!fl6.flowi6_oif)
 769                fl6.flowi6_oif = np->ucast_oif;
 770
 771        if (ip6_dst_lookup(net, sk, &dst, &fl6))
 772                goto out;
 773        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 774        if (IS_ERR(dst))
 775                goto out;
 776
 777        /* Check the ratelimit */
 778        if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
 779            !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
 780                goto out_dst_release;
 781
 782        idev = __in6_dev_get(skb->dev);
 783
 784        msg.skb = skb;
 785        msg.offset = 0;
 786        msg.type = ICMPV6_ECHO_REPLY;
 787
 788        ipcm6_init_sk(&ipc6, np);
 789        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 790        ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 791        ipc6.sockc.mark = mark;
 792
 793        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 794                            skb->len + sizeof(struct icmp6hdr),
 795                            sizeof(struct icmp6hdr), &ipc6, &fl6,
 796                            (struct rt6_info *)dst, MSG_DONTWAIT)) {
 797                __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 798                ip6_flush_pending_frames(sk);
 799        } else {
 800                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 801                                           skb->len + sizeof(struct icmp6hdr));
 802        }
 803out_dst_release:
 804        dst_release(dst);
 805out:
 806        icmpv6_xmit_unlock(sk);
 807out_bh_enable:
 808        local_bh_enable();
 809}
 810
 811void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 812{
 813        const struct inet6_protocol *ipprot;
 814        int inner_offset;
 815        __be16 frag_off;
 816        u8 nexthdr;
 817        struct net *net = dev_net(skb->dev);
 818
 819        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 820                goto out;
 821
 822        nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 823        if (ipv6_ext_hdr(nexthdr)) {
 824                /* now skip over extension headers */
 825                inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 826                                                &nexthdr, &frag_off);
 827                if (inner_offset < 0)
 828                        goto out;
 829        } else {
 830                inner_offset = sizeof(struct ipv6hdr);
 831        }
 832
 833        /* Checkin header including 8 bytes of inner protocol header. */
 834        if (!pskb_may_pull(skb, inner_offset+8))
 835                goto out;
 836
 837        /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 838           Without this we will not able f.e. to make source routed
 839           pmtu discovery.
 840           Corresponding argument (opt) to notifiers is already added.
 841           --ANK (980726)
 842         */
 843
 844        ipprot = rcu_dereference(inet6_protos[nexthdr]);
 845        if (ipprot && ipprot->err_handler)
 846                ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 847
 848        raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 849        return;
 850
 851out:
 852        __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 853}
 854
 855/*
 856 *      Handle icmp messages
 857 */
 858
 859static int icmpv6_rcv(struct sk_buff *skb)
 860{
 861        struct net *net = dev_net(skb->dev);
 862        struct net_device *dev = icmp6_dev(skb);
 863        struct inet6_dev *idev = __in6_dev_get(dev);
 864        const struct in6_addr *saddr, *daddr;
 865        struct icmp6hdr *hdr;
 866        u8 type;
 867        bool success = false;
 868
 869        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 870                struct sec_path *sp = skb_sec_path(skb);
 871                int nh;
 872
 873                if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 874                                 XFRM_STATE_ICMP))
 875                        goto drop_no_count;
 876
 877                if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 878                        goto drop_no_count;
 879
 880                nh = skb_network_offset(skb);
 881                skb_set_network_header(skb, sizeof(*hdr));
 882
 883                if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
 884                        goto drop_no_count;
 885
 886                skb_set_network_header(skb, nh);
 887        }
 888
 889        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 890
 891        saddr = &ipv6_hdr(skb)->saddr;
 892        daddr = &ipv6_hdr(skb)->daddr;
 893
 894        if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 895                net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 896                                    saddr, daddr);
 897                goto csum_error;
 898        }
 899
 900        if (!pskb_pull(skb, sizeof(*hdr)))
 901                goto discard_it;
 902
 903        hdr = icmp6_hdr(skb);
 904
 905        type = hdr->icmp6_type;
 906
 907        ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 908
 909        switch (type) {
 910        case ICMPV6_ECHO_REQUEST:
 911                if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 912                        icmpv6_echo_reply(skb);
 913                break;
 914
 915        case ICMPV6_ECHO_REPLY:
 916                success = ping_rcv(skb);
 917                break;
 918
 919        case ICMPV6_PKT_TOOBIG:
 920                /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 921                   standard destination cache. Seems, only "advanced"
 922                   destination cache will allow to solve this problem
 923                   --ANK (980726)
 924                 */
 925                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 926                        goto discard_it;
 927                hdr = icmp6_hdr(skb);
 928
 929                /* to notify */
 930                fallthrough;
 931        case ICMPV6_DEST_UNREACH:
 932        case ICMPV6_TIME_EXCEED:
 933        case ICMPV6_PARAMPROB:
 934                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 935                break;
 936
 937        case NDISC_ROUTER_SOLICITATION:
 938        case NDISC_ROUTER_ADVERTISEMENT:
 939        case NDISC_NEIGHBOUR_SOLICITATION:
 940        case NDISC_NEIGHBOUR_ADVERTISEMENT:
 941        case NDISC_REDIRECT:
 942                ndisc_rcv(skb);
 943                break;
 944
 945        case ICMPV6_MGM_QUERY:
 946                igmp6_event_query(skb);
 947                break;
 948
 949        case ICMPV6_MGM_REPORT:
 950                igmp6_event_report(skb);
 951                break;
 952
 953        case ICMPV6_MGM_REDUCTION:
 954        case ICMPV6_NI_QUERY:
 955        case ICMPV6_NI_REPLY:
 956        case ICMPV6_MLD2_REPORT:
 957        case ICMPV6_DHAAD_REQUEST:
 958        case ICMPV6_DHAAD_REPLY:
 959        case ICMPV6_MOBILE_PREFIX_SOL:
 960        case ICMPV6_MOBILE_PREFIX_ADV:
 961                break;
 962
 963        default:
 964                /* informational */
 965                if (type & ICMPV6_INFOMSG_MASK)
 966                        break;
 967
 968                net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 969                                    saddr, daddr);
 970
 971                /*
 972                 * error of unknown type.
 973                 * must pass to upper level
 974                 */
 975
 976                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 977        }
 978
 979        /* until the v6 path can be better sorted assume failure and
 980         * preserve the status quo behaviour for the rest of the paths to here
 981         */
 982        if (success)
 983                consume_skb(skb);
 984        else
 985                kfree_skb(skb);
 986
 987        return 0;
 988
 989csum_error:
 990        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 991discard_it:
 992        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 993drop_no_count:
 994        kfree_skb(skb);
 995        return 0;
 996}
 997
 998void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 999                      u8 type,
1000                      const struct in6_addr *saddr,
1001                      const struct in6_addr *daddr,
1002                      int oif)
1003{
1004        memset(fl6, 0, sizeof(*fl6));
1005        fl6->saddr = *saddr;
1006        fl6->daddr = *daddr;
1007        fl6->flowi6_proto       = IPPROTO_ICMPV6;
1008        fl6->fl6_icmp_type      = type;
1009        fl6->fl6_icmp_code      = 0;
1010        fl6->flowi6_oif         = oif;
1011        security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1012}
1013
1014static void __net_exit icmpv6_sk_exit(struct net *net)
1015{
1016        int i;
1017
1018        for_each_possible_cpu(i)
1019                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1020        free_percpu(net->ipv6.icmp_sk);
1021}
1022
1023static int __net_init icmpv6_sk_init(struct net *net)
1024{
1025        struct sock *sk;
1026        int err, i;
1027
1028        net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1029        if (!net->ipv6.icmp_sk)
1030                return -ENOMEM;
1031
1032        for_each_possible_cpu(i) {
1033                err = inet_ctl_sock_create(&sk, PF_INET6,
1034                                           SOCK_RAW, IPPROTO_ICMPV6, net);
1035                if (err < 0) {
1036                        pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1037                               err);
1038                        goto fail;
1039                }
1040
1041                *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1042
1043                /* Enough space for 2 64K ICMP packets, including
1044                 * sk_buff struct overhead.
1045                 */
1046                sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1047        }
1048        return 0;
1049
1050 fail:
1051        icmpv6_sk_exit(net);
1052        return err;
1053}
1054
1055static struct pernet_operations icmpv6_sk_ops = {
1056        .init = icmpv6_sk_init,
1057        .exit = icmpv6_sk_exit,
1058};
1059
1060int __init icmpv6_init(void)
1061{
1062        int err;
1063
1064        err = register_pernet_subsys(&icmpv6_sk_ops);
1065        if (err < 0)
1066                return err;
1067
1068        err = -EAGAIN;
1069        if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1070                goto fail;
1071
1072        err = inet6_register_icmp_sender(icmp6_send);
1073        if (err)
1074                goto sender_reg_err;
1075        return 0;
1076
1077sender_reg_err:
1078        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1079fail:
1080        pr_err("Failed to register ICMP6 protocol\n");
1081        unregister_pernet_subsys(&icmpv6_sk_ops);
1082        return err;
1083}
1084
1085void icmpv6_cleanup(void)
1086{
1087        inet6_unregister_icmp_sender(icmp6_send);
1088        unregister_pernet_subsys(&icmpv6_sk_ops);
1089        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1090}
1091
1092
1093static const struct icmp6_err {
1094        int err;
1095        int fatal;
1096} tab_unreach[] = {
1097        {       /* NOROUTE */
1098                .err    = ENETUNREACH,
1099                .fatal  = 0,
1100        },
1101        {       /* ADM_PROHIBITED */
1102                .err    = EACCES,
1103                .fatal  = 1,
1104        },
1105        {       /* Was NOT_NEIGHBOUR, now reserved */
1106                .err    = EHOSTUNREACH,
1107                .fatal  = 0,
1108        },
1109        {       /* ADDR_UNREACH */
1110                .err    = EHOSTUNREACH,
1111                .fatal  = 0,
1112        },
1113        {       /* PORT_UNREACH */
1114                .err    = ECONNREFUSED,
1115                .fatal  = 1,
1116        },
1117        {       /* POLICY_FAIL */
1118                .err    = EACCES,
1119                .fatal  = 1,
1120        },
1121        {       /* REJECT_ROUTE */
1122                .err    = EACCES,
1123                .fatal  = 1,
1124        },
1125};
1126
1127int icmpv6_err_convert(u8 type, u8 code, int *err)
1128{
1129        int fatal = 0;
1130
1131        *err = EPROTO;
1132
1133        switch (type) {
1134        case ICMPV6_DEST_UNREACH:
1135                fatal = 1;
1136                if (code < ARRAY_SIZE(tab_unreach)) {
1137                        *err  = tab_unreach[code].err;
1138                        fatal = tab_unreach[code].fatal;
1139                }
1140                break;
1141
1142        case ICMPV6_PKT_TOOBIG:
1143                *err = EMSGSIZE;
1144                break;
1145
1146        case ICMPV6_PARAMPROB:
1147                *err = EPROTO;
1148                fatal = 1;
1149                break;
1150
1151        case ICMPV6_TIME_EXCEED:
1152                *err = EHOSTUNREACH;
1153                break;
1154        }
1155
1156        return fatal;
1157}
1158EXPORT_SYMBOL(icmpv6_err_convert);
1159
1160#ifdef CONFIG_SYSCTL
1161static struct ctl_table ipv6_icmp_table_template[] = {
1162        {
1163                .procname       = "ratelimit",
1164                .data           = &init_net.ipv6.sysctl.icmpv6_time,
1165                .maxlen         = sizeof(int),
1166                .mode           = 0644,
1167                .proc_handler   = proc_dointvec_ms_jiffies,
1168        },
1169        {
1170                .procname       = "echo_ignore_all",
1171                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1172                .maxlen         = sizeof(int),
1173                .mode           = 0644,
1174                .proc_handler = proc_dointvec,
1175        },
1176        {
1177                .procname       = "echo_ignore_multicast",
1178                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1179                .maxlen         = sizeof(int),
1180                .mode           = 0644,
1181                .proc_handler = proc_dointvec,
1182        },
1183        {
1184                .procname       = "echo_ignore_anycast",
1185                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1186                .maxlen         = sizeof(int),
1187                .mode           = 0644,
1188                .proc_handler = proc_dointvec,
1189        },
1190        {
1191                .procname       = "ratemask",
1192                .data           = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1193                .maxlen         = ICMPV6_MSG_MAX + 1,
1194                .mode           = 0644,
1195                .proc_handler = proc_do_large_bitmap,
1196        },
1197        { },
1198};
1199
1200struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1201{
1202        struct ctl_table *table;
1203
1204        table = kmemdup(ipv6_icmp_table_template,
1205                        sizeof(ipv6_icmp_table_template),
1206                        GFP_KERNEL);
1207
1208        if (table) {
1209                table[0].data = &net->ipv6.sysctl.icmpv6_time;
1210                table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1211                table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1212                table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1213                table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1214        }
1215        return table;
1216}
1217#endif
1218