linux/net/ipv6/icmp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Internet Control Message Protocol (ICMPv6)
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on net/ipv4/icmp.c
  10 *
  11 *      RFC 1885
  12 */
  13
  14/*
  15 *      Changes:
  16 *
  17 *      Andi Kleen              :       exception handling
  18 *      Andi Kleen                      add rate limits. never reply to a icmp.
  19 *                                      add more length checks and other fixes.
  20 *      yoshfuji                :       ensure to sent parameter problem for
  21 *                                      fragments.
  22 *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  23 *      Randy Dunlap and
  24 *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  25 *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  26 */
  27
  28#define pr_fmt(fmt) "IPv6: " fmt
  29
  30#include <linux/module.h>
  31#include <linux/errno.h>
  32#include <linux/types.h>
  33#include <linux/socket.h>
  34#include <linux/in.h>
  35#include <linux/kernel.h>
  36#include <linux/sockios.h>
  37#include <linux/net.h>
  38#include <linux/skbuff.h>
  39#include <linux/init.h>
  40#include <linux/netfilter.h>
  41#include <linux/slab.h>
  42
  43#ifdef CONFIG_SYSCTL
  44#include <linux/sysctl.h>
  45#endif
  46
  47#include <linux/inet.h>
  48#include <linux/netdevice.h>
  49#include <linux/icmpv6.h>
  50
  51#include <net/ip.h>
  52#include <net/sock.h>
  53
  54#include <net/ipv6.h>
  55#include <net/ip6_checksum.h>
  56#include <net/ping.h>
  57#include <net/protocol.h>
  58#include <net/raw.h>
  59#include <net/rawv6.h>
  60#include <net/seg6.h>
  61#include <net/transp_v6.h>
  62#include <net/ip6_route.h>
  63#include <net/addrconf.h>
  64#include <net/icmp.h>
  65#include <net/xfrm.h>
  66#include <net/inet_common.h>
  67#include <net/dsfield.h>
  68#include <net/l3mdev.h>
  69
  70#include <linux/uaccess.h>
  71
  72static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
  73
  74static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  75                       u8 type, u8 code, int offset, __be32 info)
  76{
  77        /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  78        struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  79        struct net *net = dev_net(skb->dev);
  80
  81        if (type == ICMPV6_PKT_TOOBIG)
  82                ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
  83        else if (type == NDISC_REDIRECT)
  84                ip6_redirect(skb, net, skb->dev->ifindex, 0,
  85                             sock_net_uid(net, NULL));
  86
  87        if (!(type & ICMPV6_INFOMSG_MASK))
  88                if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
  89                        ping_err(skb, offset, ntohl(info));
  90
  91        return 0;
  92}
  93
  94static int icmpv6_rcv(struct sk_buff *skb);
  95
  96static const struct inet6_protocol icmpv6_protocol = {
  97        .handler        =       icmpv6_rcv,
  98        .err_handler    =       icmpv6_err,
  99        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 100};
 101
 102/* Called with BH disabled */
 103static struct sock *icmpv6_xmit_lock(struct net *net)
 104{
 105        struct sock *sk;
 106
 107        sk = this_cpu_read(ipv6_icmp_sk);
 108        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 109                /* This can happen if the output path (f.e. SIT or
 110                 * ip6ip6 tunnel) signals dst_link_failure() for an
 111                 * outgoing ICMP6 packet.
 112                 */
 113                return NULL;
 114        }
 115        sock_net_set(sk, net);
 116        return sk;
 117}
 118
 119static void icmpv6_xmit_unlock(struct sock *sk)
 120{
 121        sock_net_set(sk, &init_net);
 122        spin_unlock(&sk->sk_lock.slock);
 123}
 124
 125/*
 126 * Figure out, may we reply to this packet with icmp error.
 127 *
 128 * We do not reply, if:
 129 *      - it was icmp error message.
 130 *      - it is truncated, so that it is known, that protocol is ICMPV6
 131 *        (i.e. in the middle of some exthdr)
 132 *
 133 *      --ANK (980726)
 134 */
 135
 136static bool is_ineligible(const struct sk_buff *skb)
 137{
 138        int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 139        int len = skb->len - ptr;
 140        __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 141        __be16 frag_off;
 142
 143        if (len < 0)
 144                return true;
 145
 146        ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 147        if (ptr < 0)
 148                return false;
 149        if (nexthdr == IPPROTO_ICMPV6) {
 150                u8 _type, *tp;
 151                tp = skb_header_pointer(skb,
 152                        ptr+offsetof(struct icmp6hdr, icmp6_type),
 153                        sizeof(_type), &_type);
 154
 155                /* Based on RFC 8200, Section 4.5 Fragment Header, return
 156                 * false if this is a fragment packet with no icmp header info.
 157                 */
 158                if (!tp && frag_off != 0)
 159                        return false;
 160                else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 161                        return true;
 162        }
 163        return false;
 164}
 165
 166static bool icmpv6_mask_allow(struct net *net, int type)
 167{
 168        if (type > ICMPV6_MSG_MAX)
 169                return true;
 170
 171        /* Limit if icmp type is set in ratemask. */
 172        if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
 173                return true;
 174
 175        return false;
 176}
 177
 178static bool icmpv6_global_allow(struct net *net, int type)
 179{
 180        if (icmpv6_mask_allow(net, type))
 181                return true;
 182
 183        if (icmp_global_allow())
 184                return true;
 185
 186        return false;
 187}
 188
 189/*
 190 * Check the ICMP output rate limit
 191 */
 192static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 193                               struct flowi6 *fl6)
 194{
 195        struct net *net = sock_net(sk);
 196        struct dst_entry *dst;
 197        bool res = false;
 198
 199        if (icmpv6_mask_allow(net, type))
 200                return true;
 201
 202        /*
 203         * Look up the output route.
 204         * XXX: perhaps the expire for routing entries cloned by
 205         * this lookup should be more aggressive (not longer than timeout).
 206         */
 207        dst = ip6_route_output(net, sk, fl6);
 208        if (dst->error) {
 209                IP6_INC_STATS(net, ip6_dst_idev(dst),
 210                              IPSTATS_MIB_OUTNOROUTES);
 211        } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 212                res = true;
 213        } else {
 214                struct rt6_info *rt = (struct rt6_info *)dst;
 215                int tmo = net->ipv6.sysctl.icmpv6_time;
 216                struct inet_peer *peer;
 217
 218                /* Give more bandwidth to wider prefixes. */
 219                if (rt->rt6i_dst.plen < 128)
 220                        tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 221
 222                peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 223                res = inet_peer_xrlim_allow(peer, tmo);
 224                if (peer)
 225                        inet_putpeer(peer);
 226        }
 227        dst_release(dst);
 228        return res;
 229}
 230
 231static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
 232                                  struct flowi6 *fl6)
 233{
 234        struct net *net = sock_net(sk);
 235        struct dst_entry *dst;
 236        bool res = false;
 237
 238        dst = ip6_route_output(net, sk, fl6);
 239        if (!dst->error) {
 240                struct rt6_info *rt = (struct rt6_info *)dst;
 241                struct in6_addr prefsrc;
 242
 243                rt6_get_prefsrc(rt, &prefsrc);
 244                res = !ipv6_addr_any(&prefsrc);
 245        }
 246        dst_release(dst);
 247        return res;
 248}
 249
 250/*
 251 *      an inline helper for the "simple" if statement below
 252 *      checks if parameter problem report is caused by an
 253 *      unrecognized IPv6 option that has the Option Type
 254 *      highest-order two bits set to 10
 255 */
 256
 257static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 258{
 259        u8 _optval, *op;
 260
 261        offset += skb_network_offset(skb);
 262        op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 263        if (!op)
 264                return true;
 265        return (*op & 0xC0) == 0x80;
 266}
 267
 268void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 269                                struct icmp6hdr *thdr, int len)
 270{
 271        struct sk_buff *skb;
 272        struct icmp6hdr *icmp6h;
 273
 274        skb = skb_peek(&sk->sk_write_queue);
 275        if (!skb)
 276                return;
 277
 278        icmp6h = icmp6_hdr(skb);
 279        memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 280        icmp6h->icmp6_cksum = 0;
 281
 282        if (skb_queue_len(&sk->sk_write_queue) == 1) {
 283                skb->csum = csum_partial(icmp6h,
 284                                        sizeof(struct icmp6hdr), skb->csum);
 285                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 286                                                      &fl6->daddr,
 287                                                      len, fl6->flowi6_proto,
 288                                                      skb->csum);
 289        } else {
 290                __wsum tmp_csum = 0;
 291
 292                skb_queue_walk(&sk->sk_write_queue, skb) {
 293                        tmp_csum = csum_add(tmp_csum, skb->csum);
 294                }
 295
 296                tmp_csum = csum_partial(icmp6h,
 297                                        sizeof(struct icmp6hdr), tmp_csum);
 298                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 299                                                      &fl6->daddr,
 300                                                      len, fl6->flowi6_proto,
 301                                                      tmp_csum);
 302        }
 303        ip6_push_pending_frames(sk);
 304}
 305
 306struct icmpv6_msg {
 307        struct sk_buff  *skb;
 308        int             offset;
 309        uint8_t         type;
 310};
 311
 312static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 313{
 314        struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 315        struct sk_buff *org_skb = msg->skb;
 316        __wsum csum;
 317
 318        csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 319                                      to, len);
 320        skb->csum = csum_block_add(skb->csum, csum, odd);
 321        if (!(msg->type & ICMPV6_INFOMSG_MASK))
 322                nf_ct_attach(skb, org_skb);
 323        return 0;
 324}
 325
 326#if IS_ENABLED(CONFIG_IPV6_MIP6)
 327static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
 328{
 329        struct ipv6hdr *iph = ipv6_hdr(skb);
 330        struct ipv6_destopt_hao *hao;
 331        struct in6_addr tmp;
 332        int off;
 333
 334        if (opt->dsthao) {
 335                off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 336                if (likely(off >= 0)) {
 337                        hao = (struct ipv6_destopt_hao *)
 338                                        (skb_network_header(skb) + off);
 339                        tmp = iph->saddr;
 340                        iph->saddr = hao->addr;
 341                        hao->addr = tmp;
 342                }
 343        }
 344}
 345#else
 346static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
 347#endif
 348
 349static struct dst_entry *icmpv6_route_lookup(struct net *net,
 350                                             struct sk_buff *skb,
 351                                             struct sock *sk,
 352                                             struct flowi6 *fl6)
 353{
 354        struct dst_entry *dst, *dst2;
 355        struct flowi6 fl2;
 356        int err;
 357
 358        err = ip6_dst_lookup(net, sk, &dst, fl6);
 359        if (err)
 360                return ERR_PTR(err);
 361
 362        /*
 363         * We won't send icmp if the destination is known
 364         * anycast.
 365         */
 366        if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 367                net_dbg_ratelimited("icmp6_send: acast source\n");
 368                dst_release(dst);
 369                return ERR_PTR(-EINVAL);
 370        }
 371
 372        /* No need to clone since we're just using its address. */
 373        dst2 = dst;
 374
 375        dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 376        if (!IS_ERR(dst)) {
 377                if (dst != dst2)
 378                        return dst;
 379        } else {
 380                if (PTR_ERR(dst) == -EPERM)
 381                        dst = NULL;
 382                else
 383                        return dst;
 384        }
 385
 386        err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 387        if (err)
 388                goto relookup_failed;
 389
 390        err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 391        if (err)
 392                goto relookup_failed;
 393
 394        dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 395        if (!IS_ERR(dst2)) {
 396                dst_release(dst);
 397                dst = dst2;
 398        } else {
 399                err = PTR_ERR(dst2);
 400                if (err == -EPERM) {
 401                        dst_release(dst);
 402                        return dst2;
 403                } else
 404                        goto relookup_failed;
 405        }
 406
 407relookup_failed:
 408        if (dst)
 409                return dst;
 410        return ERR_PTR(err);
 411}
 412
 413static struct net_device *icmp6_dev(const struct sk_buff *skb)
 414{
 415        struct net_device *dev = skb->dev;
 416
 417        /* for local traffic to local address, skb dev is the loopback
 418         * device. Check if there is a dst attached to the skb and if so
 419         * get the real device index. Same is needed for replies to a link
 420         * local address on a device enslaved to an L3 master device
 421         */
 422        if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 423                const struct rt6_info *rt6 = skb_rt6_info(skb);
 424
 425                if (rt6)
 426                        dev = rt6->rt6i_idev->dev;
 427        }
 428
 429        return dev;
 430}
 431
 432static int icmp6_iif(const struct sk_buff *skb)
 433{
 434        return icmp6_dev(skb)->ifindex;
 435}
 436
 437/*
 438 *      Send an ICMP message in response to a packet in error
 439 */
 440void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 441                const struct in6_addr *force_saddr,
 442                const struct inet6_skb_parm *parm)
 443{
 444        struct inet6_dev *idev = NULL;
 445        struct ipv6hdr *hdr = ipv6_hdr(skb);
 446        struct sock *sk;
 447        struct net *net;
 448        struct ipv6_pinfo *np;
 449        const struct in6_addr *saddr = NULL;
 450        struct dst_entry *dst;
 451        struct icmp6hdr tmp_hdr;
 452        struct flowi6 fl6;
 453        struct icmpv6_msg msg;
 454        struct ipcm6_cookie ipc6;
 455        int iif = 0;
 456        int addr_type = 0;
 457        int len;
 458        u32 mark;
 459
 460        if ((u8 *)hdr < skb->head ||
 461            (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 462                return;
 463
 464        if (!skb->dev)
 465                return;
 466        net = dev_net(skb->dev);
 467        mark = IP6_REPLY_MARK(net, skb->mark);
 468        /*
 469         *      Make sure we respect the rules
 470         *      i.e. RFC 1885 2.4(e)
 471         *      Rule (e.1) is enforced by not using icmp6_send
 472         *      in any code that processes icmp errors.
 473         */
 474        addr_type = ipv6_addr_type(&hdr->daddr);
 475
 476        if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 477            ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 478                saddr = &hdr->daddr;
 479
 480        /*
 481         *      Dest addr check
 482         */
 483
 484        if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 485                if (type != ICMPV6_PKT_TOOBIG &&
 486                    !(type == ICMPV6_PARAMPROB &&
 487                      code == ICMPV6_UNK_OPTION &&
 488                      (opt_unrec(skb, info))))
 489                        return;
 490
 491                saddr = NULL;
 492        }
 493
 494        addr_type = ipv6_addr_type(&hdr->saddr);
 495
 496        /*
 497         *      Source addr check
 498         */
 499
 500        if (__ipv6_addr_needs_scope_id(addr_type)) {
 501                iif = icmp6_iif(skb);
 502        } else {
 503                /*
 504                 * The source device is used for looking up which routing table
 505                 * to use for sending an ICMP error.
 506                 */
 507                iif = l3mdev_master_ifindex(skb->dev);
 508        }
 509
 510        /*
 511         *      Must not send error if the source does not uniquely
 512         *      identify a single node (RFC2463 Section 2.4).
 513         *      We check unspecified / multicast addresses here,
 514         *      and anycast addresses will be checked later.
 515         */
 516        if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 517                net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 518                                    &hdr->saddr, &hdr->daddr);
 519                return;
 520        }
 521
 522        /*
 523         *      Never answer to a ICMP packet.
 524         */
 525        if (is_ineligible(skb)) {
 526                net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 527                                    &hdr->saddr, &hdr->daddr);
 528                return;
 529        }
 530
 531        /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 532        local_bh_disable();
 533
 534        /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 535        if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
 536                goto out_bh_enable;
 537
 538        mip6_addr_swap(skb, parm);
 539
 540        sk = icmpv6_xmit_lock(net);
 541        if (!sk)
 542                goto out_bh_enable;
 543
 544        memset(&fl6, 0, sizeof(fl6));
 545        fl6.flowi6_proto = IPPROTO_ICMPV6;
 546        fl6.daddr = hdr->saddr;
 547        if (force_saddr)
 548                saddr = force_saddr;
 549        if (saddr) {
 550                fl6.saddr = *saddr;
 551        } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
 552                /* select a more meaningful saddr from input if */
 553                struct net_device *in_netdev;
 554
 555                in_netdev = dev_get_by_index(net, parm->iif);
 556                if (in_netdev) {
 557                        ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
 558                                           inet6_sk(sk)->srcprefs,
 559                                           &fl6.saddr);
 560                        dev_put(in_netdev);
 561                }
 562        }
 563        fl6.flowi6_mark = mark;
 564        fl6.flowi6_oif = iif;
 565        fl6.fl6_icmp_type = type;
 566        fl6.fl6_icmp_code = code;
 567        fl6.flowi6_uid = sock_net_uid(net, NULL);
 568        fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 569        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 570
 571        np = inet6_sk(sk);
 572
 573        if (!icmpv6_xrlim_allow(sk, type, &fl6))
 574                goto out;
 575
 576        tmp_hdr.icmp6_type = type;
 577        tmp_hdr.icmp6_code = code;
 578        tmp_hdr.icmp6_cksum = 0;
 579        tmp_hdr.icmp6_pointer = htonl(info);
 580
 581        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 582                fl6.flowi6_oif = np->mcast_oif;
 583        else if (!fl6.flowi6_oif)
 584                fl6.flowi6_oif = np->ucast_oif;
 585
 586        ipcm6_init_sk(&ipc6, np);
 587        ipc6.sockc.mark = mark;
 588        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 589
 590        dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 591        if (IS_ERR(dst))
 592                goto out;
 593
 594        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 595
 596        msg.skb = skb;
 597        msg.offset = skb_network_offset(skb);
 598        msg.type = type;
 599
 600        len = skb->len - msg.offset;
 601        len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 602        if (len < 0) {
 603                net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 604                                    &hdr->saddr, &hdr->daddr);
 605                goto out_dst_release;
 606        }
 607
 608        rcu_read_lock();
 609        idev = __in6_dev_get(skb->dev);
 610
 611        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 612                            len + sizeof(struct icmp6hdr),
 613                            sizeof(struct icmp6hdr),
 614                            &ipc6, &fl6, (struct rt6_info *)dst,
 615                            MSG_DONTWAIT)) {
 616                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 617                ip6_flush_pending_frames(sk);
 618        } else {
 619                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 620                                           len + sizeof(struct icmp6hdr));
 621        }
 622        rcu_read_unlock();
 623out_dst_release:
 624        dst_release(dst);
 625out:
 626        icmpv6_xmit_unlock(sk);
 627out_bh_enable:
 628        local_bh_enable();
 629}
 630EXPORT_SYMBOL(icmp6_send);
 631
 632/* Slightly more convenient version of icmp6_send with drop reasons.
 633 */
 634void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
 635                              enum skb_drop_reason reason)
 636{
 637        icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
 638        kfree_skb_reason(skb, reason);
 639}
 640
 641/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 642 * if sufficient data bytes are available
 643 * @nhs is the size of the tunnel header(s) :
 644 *  Either an IPv4 header for SIT encap
 645 *         an IPv4 header + GRE header for GRE encap
 646 */
 647int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 648                               unsigned int data_len)
 649{
 650        struct in6_addr temp_saddr;
 651        struct rt6_info *rt;
 652        struct sk_buff *skb2;
 653        u32 info = 0;
 654
 655        if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 656                return 1;
 657
 658        /* RFC 4884 (partial) support for ICMP extensions */
 659        if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 660                data_len = 0;
 661
 662        skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 663
 664        if (!skb2)
 665                return 1;
 666
 667        skb_dst_drop(skb2);
 668        skb_pull(skb2, nhs);
 669        skb_reset_network_header(skb2);
 670
 671        rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 672                        skb, 0);
 673
 674        if (rt && rt->dst.dev)
 675                skb2->dev = rt->dst.dev;
 676
 677        ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 678
 679        if (data_len) {
 680                /* RFC 4884 (partial) support :
 681                 * insert 0 padding at the end, before the extensions
 682                 */
 683                __skb_push(skb2, nhs);
 684                skb_reset_network_header(skb2);
 685                memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 686                memset(skb2->data + data_len - nhs, 0, nhs);
 687                /* RFC 4884 4.5 : Length is measured in 64-bit words,
 688                 * and stored in reserved[0]
 689                 */
 690                info = (data_len/8) << 24;
 691        }
 692        if (type == ICMP_TIME_EXCEEDED)
 693                icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 694                           info, &temp_saddr, IP6CB(skb2));
 695        else
 696                icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 697                           info, &temp_saddr, IP6CB(skb2));
 698        if (rt)
 699                ip6_rt_put(rt);
 700
 701        kfree_skb(skb2);
 702
 703        return 0;
 704}
 705EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 706
 707static void icmpv6_echo_reply(struct sk_buff *skb)
 708{
 709        struct net *net = dev_net(skb->dev);
 710        struct sock *sk;
 711        struct inet6_dev *idev;
 712        struct ipv6_pinfo *np;
 713        const struct in6_addr *saddr = NULL;
 714        struct icmp6hdr *icmph = icmp6_hdr(skb);
 715        struct icmp6hdr tmp_hdr;
 716        struct flowi6 fl6;
 717        struct icmpv6_msg msg;
 718        struct dst_entry *dst;
 719        struct ipcm6_cookie ipc6;
 720        u32 mark = IP6_REPLY_MARK(net, skb->mark);
 721        bool acast;
 722        u8 type;
 723
 724        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
 725            net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
 726                return;
 727
 728        saddr = &ipv6_hdr(skb)->daddr;
 729
 730        acast = ipv6_anycast_destination(skb_dst(skb), saddr);
 731        if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
 732                return;
 733
 734        if (!ipv6_unicast_destination(skb) &&
 735            !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
 736                saddr = NULL;
 737
 738        if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
 739                type = ICMPV6_EXT_ECHO_REPLY;
 740        else
 741                type = ICMPV6_ECHO_REPLY;
 742
 743        memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 744        tmp_hdr.icmp6_type = type;
 745
 746        memset(&fl6, 0, sizeof(fl6));
 747        if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
 748                fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
 749
 750        fl6.flowi6_proto = IPPROTO_ICMPV6;
 751        fl6.daddr = ipv6_hdr(skb)->saddr;
 752        if (saddr)
 753                fl6.saddr = *saddr;
 754        fl6.flowi6_oif = icmp6_iif(skb);
 755        fl6.fl6_icmp_type = type;
 756        fl6.flowi6_mark = mark;
 757        fl6.flowi6_uid = sock_net_uid(net, NULL);
 758        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 759
 760        local_bh_disable();
 761        sk = icmpv6_xmit_lock(net);
 762        if (!sk)
 763                goto out_bh_enable;
 764        np = inet6_sk(sk);
 765
 766        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 767                fl6.flowi6_oif = np->mcast_oif;
 768        else if (!fl6.flowi6_oif)
 769                fl6.flowi6_oif = np->ucast_oif;
 770
 771        if (ip6_dst_lookup(net, sk, &dst, &fl6))
 772                goto out;
 773        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 774        if (IS_ERR(dst))
 775                goto out;
 776
 777        /* Check the ratelimit */
 778        if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
 779            !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
 780                goto out_dst_release;
 781
 782        idev = __in6_dev_get(skb->dev);
 783
 784        msg.skb = skb;
 785        msg.offset = 0;
 786        msg.type = type;
 787
 788        ipcm6_init_sk(&ipc6, np);
 789        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 790        ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 791        ipc6.sockc.mark = mark;
 792
 793        if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
 794                if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
 795                        goto out_dst_release;
 796
 797        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 798                            skb->len + sizeof(struct icmp6hdr),
 799                            sizeof(struct icmp6hdr), &ipc6, &fl6,
 800                            (struct rt6_info *)dst, MSG_DONTWAIT)) {
 801                __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 802                ip6_flush_pending_frames(sk);
 803        } else {
 804                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 805                                           skb->len + sizeof(struct icmp6hdr));
 806        }
 807out_dst_release:
 808        dst_release(dst);
 809out:
 810        icmpv6_xmit_unlock(sk);
 811out_bh_enable:
 812        local_bh_enable();
 813}
 814
 815void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 816{
 817        struct inet6_skb_parm *opt = IP6CB(skb);
 818        const struct inet6_protocol *ipprot;
 819        int inner_offset;
 820        __be16 frag_off;
 821        u8 nexthdr;
 822        struct net *net = dev_net(skb->dev);
 823
 824        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 825                goto out;
 826
 827        seg6_icmp_srh(skb, opt);
 828
 829        nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 830        if (ipv6_ext_hdr(nexthdr)) {
 831                /* now skip over extension headers */
 832                inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 833                                                &nexthdr, &frag_off);
 834                if (inner_offset < 0)
 835                        goto out;
 836        } else {
 837                inner_offset = sizeof(struct ipv6hdr);
 838        }
 839
 840        /* Checkin header including 8 bytes of inner protocol header. */
 841        if (!pskb_may_pull(skb, inner_offset+8))
 842                goto out;
 843
 844        /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 845           Without this we will not able f.e. to make source routed
 846           pmtu discovery.
 847           Corresponding argument (opt) to notifiers is already added.
 848           --ANK (980726)
 849         */
 850
 851        ipprot = rcu_dereference(inet6_protos[nexthdr]);
 852        if (ipprot && ipprot->err_handler)
 853                ipprot->err_handler(skb, opt, type, code, inner_offset, info);
 854
 855        raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 856        return;
 857
 858out:
 859        __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 860}
 861
 862/*
 863 *      Handle icmp messages
 864 */
 865
 866static int icmpv6_rcv(struct sk_buff *skb)
 867{
 868        enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
 869        struct net *net = dev_net(skb->dev);
 870        struct net_device *dev = icmp6_dev(skb);
 871        struct inet6_dev *idev = __in6_dev_get(dev);
 872        const struct in6_addr *saddr, *daddr;
 873        struct icmp6hdr *hdr;
 874        u8 type;
 875
 876        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 877                struct sec_path *sp = skb_sec_path(skb);
 878                int nh;
 879
 880                if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 881                                 XFRM_STATE_ICMP)) {
 882                        reason = SKB_DROP_REASON_XFRM_POLICY;
 883                        goto drop_no_count;
 884                }
 885
 886                if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 887                        goto drop_no_count;
 888
 889                nh = skb_network_offset(skb);
 890                skb_set_network_header(skb, sizeof(*hdr));
 891
 892                if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
 893                                                skb)) {
 894                        reason = SKB_DROP_REASON_XFRM_POLICY;
 895                        goto drop_no_count;
 896                }
 897
 898                skb_set_network_header(skb, nh);
 899        }
 900
 901        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 902
 903        saddr = &ipv6_hdr(skb)->saddr;
 904        daddr = &ipv6_hdr(skb)->daddr;
 905
 906        if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 907                net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 908                                    saddr, daddr);
 909                goto csum_error;
 910        }
 911
 912        if (!pskb_pull(skb, sizeof(*hdr)))
 913                goto discard_it;
 914
 915        hdr = icmp6_hdr(skb);
 916
 917        type = hdr->icmp6_type;
 918
 919        ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 920
 921        switch (type) {
 922        case ICMPV6_ECHO_REQUEST:
 923                if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 924                        icmpv6_echo_reply(skb);
 925                break;
 926        case ICMPV6_EXT_ECHO_REQUEST:
 927                if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
 928                    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
 929                        icmpv6_echo_reply(skb);
 930                break;
 931
 932        case ICMPV6_ECHO_REPLY:
 933                reason = ping_rcv(skb);
 934                break;
 935
 936        case ICMPV6_EXT_ECHO_REPLY:
 937                reason = ping_rcv(skb);
 938                break;
 939
 940        case ICMPV6_PKT_TOOBIG:
 941                /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 942                   standard destination cache. Seems, only "advanced"
 943                   destination cache will allow to solve this problem
 944                   --ANK (980726)
 945                 */
 946                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 947                        goto discard_it;
 948                hdr = icmp6_hdr(skb);
 949
 950                /* to notify */
 951                fallthrough;
 952        case ICMPV6_DEST_UNREACH:
 953        case ICMPV6_TIME_EXCEED:
 954        case ICMPV6_PARAMPROB:
 955                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 956                break;
 957
 958        case NDISC_ROUTER_SOLICITATION:
 959        case NDISC_ROUTER_ADVERTISEMENT:
 960        case NDISC_NEIGHBOUR_SOLICITATION:
 961        case NDISC_NEIGHBOUR_ADVERTISEMENT:
 962        case NDISC_REDIRECT:
 963                ndisc_rcv(skb);
 964                break;
 965
 966        case ICMPV6_MGM_QUERY:
 967                igmp6_event_query(skb);
 968                return 0;
 969
 970        case ICMPV6_MGM_REPORT:
 971                igmp6_event_report(skb);
 972                return 0;
 973
 974        case ICMPV6_MGM_REDUCTION:
 975        case ICMPV6_NI_QUERY:
 976        case ICMPV6_NI_REPLY:
 977        case ICMPV6_MLD2_REPORT:
 978        case ICMPV6_DHAAD_REQUEST:
 979        case ICMPV6_DHAAD_REPLY:
 980        case ICMPV6_MOBILE_PREFIX_SOL:
 981        case ICMPV6_MOBILE_PREFIX_ADV:
 982                break;
 983
 984        default:
 985                /* informational */
 986                if (type & ICMPV6_INFOMSG_MASK)
 987                        break;
 988
 989                net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 990                                    saddr, daddr);
 991
 992                /*
 993                 * error of unknown type.
 994                 * must pass to upper level
 995                 */
 996
 997                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 998        }
 999
1000        /* until the v6 path can be better sorted assume failure and
1001         * preserve the status quo behaviour for the rest of the paths to here
1002         */
1003        if (reason)
1004                kfree_skb_reason(skb, reason);
1005        else
1006                consume_skb(skb);
1007
1008        return 0;
1009
1010csum_error:
1011        reason = SKB_DROP_REASON_ICMP_CSUM;
1012        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1013discard_it:
1014        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1015drop_no_count:
1016        kfree_skb_reason(skb, reason);
1017        return 0;
1018}
1019
1020void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1021                      u8 type,
1022                      const struct in6_addr *saddr,
1023                      const struct in6_addr *daddr,
1024                      int oif)
1025{
1026        memset(fl6, 0, sizeof(*fl6));
1027        fl6->saddr = *saddr;
1028        fl6->daddr = *daddr;
1029        fl6->flowi6_proto       = IPPROTO_ICMPV6;
1030        fl6->fl6_icmp_type      = type;
1031        fl6->fl6_icmp_code      = 0;
1032        fl6->flowi6_oif         = oif;
1033        security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1034}
1035
1036int __init icmpv6_init(void)
1037{
1038        struct sock *sk;
1039        int err, i;
1040
1041        for_each_possible_cpu(i) {
1042                err = inet_ctl_sock_create(&sk, PF_INET6,
1043                                           SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1044                if (err < 0) {
1045                        pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1046                               err);
1047                        return err;
1048                }
1049
1050                per_cpu(ipv6_icmp_sk, i) = sk;
1051
1052                /* Enough space for 2 64K ICMP packets, including
1053                 * sk_buff struct overhead.
1054                 */
1055                sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1056        }
1057
1058        err = -EAGAIN;
1059        if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1060                goto fail;
1061
1062        err = inet6_register_icmp_sender(icmp6_send);
1063        if (err)
1064                goto sender_reg_err;
1065        return 0;
1066
1067sender_reg_err:
1068        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1069fail:
1070        pr_err("Failed to register ICMP6 protocol\n");
1071        return err;
1072}
1073
1074void icmpv6_cleanup(void)
1075{
1076        inet6_unregister_icmp_sender(icmp6_send);
1077        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1078}
1079
1080
1081static const struct icmp6_err {
1082        int err;
1083        int fatal;
1084} tab_unreach[] = {
1085        {       /* NOROUTE */
1086                .err    = ENETUNREACH,
1087                .fatal  = 0,
1088        },
1089        {       /* ADM_PROHIBITED */
1090                .err    = EACCES,
1091                .fatal  = 1,
1092        },
1093        {       /* Was NOT_NEIGHBOUR, now reserved */
1094                .err    = EHOSTUNREACH,
1095                .fatal  = 0,
1096        },
1097        {       /* ADDR_UNREACH */
1098                .err    = EHOSTUNREACH,
1099                .fatal  = 0,
1100        },
1101        {       /* PORT_UNREACH */
1102                .err    = ECONNREFUSED,
1103                .fatal  = 1,
1104        },
1105        {       /* POLICY_FAIL */
1106                .err    = EACCES,
1107                .fatal  = 1,
1108        },
1109        {       /* REJECT_ROUTE */
1110                .err    = EACCES,
1111                .fatal  = 1,
1112        },
1113};
1114
1115int icmpv6_err_convert(u8 type, u8 code, int *err)
1116{
1117        int fatal = 0;
1118
1119        *err = EPROTO;
1120
1121        switch (type) {
1122        case ICMPV6_DEST_UNREACH:
1123                fatal = 1;
1124                if (code < ARRAY_SIZE(tab_unreach)) {
1125                        *err  = tab_unreach[code].err;
1126                        fatal = tab_unreach[code].fatal;
1127                }
1128                break;
1129
1130        case ICMPV6_PKT_TOOBIG:
1131                *err = EMSGSIZE;
1132                break;
1133
1134        case ICMPV6_PARAMPROB:
1135                *err = EPROTO;
1136                fatal = 1;
1137                break;
1138
1139        case ICMPV6_TIME_EXCEED:
1140                *err = EHOSTUNREACH;
1141                break;
1142        }
1143
1144        return fatal;
1145}
1146EXPORT_SYMBOL(icmpv6_err_convert);
1147
1148#ifdef CONFIG_SYSCTL
1149static struct ctl_table ipv6_icmp_table_template[] = {
1150        {
1151                .procname       = "ratelimit",
1152                .data           = &init_net.ipv6.sysctl.icmpv6_time,
1153                .maxlen         = sizeof(int),
1154                .mode           = 0644,
1155                .proc_handler   = proc_dointvec_ms_jiffies,
1156        },
1157        {
1158                .procname       = "echo_ignore_all",
1159                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1160                .maxlen         = sizeof(u8),
1161                .mode           = 0644,
1162                .proc_handler = proc_dou8vec_minmax,
1163        },
1164        {
1165                .procname       = "echo_ignore_multicast",
1166                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1167                .maxlen         = sizeof(u8),
1168                .mode           = 0644,
1169                .proc_handler = proc_dou8vec_minmax,
1170        },
1171        {
1172                .procname       = "echo_ignore_anycast",
1173                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1174                .maxlen         = sizeof(u8),
1175                .mode           = 0644,
1176                .proc_handler = proc_dou8vec_minmax,
1177        },
1178        {
1179                .procname       = "ratemask",
1180                .data           = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1181                .maxlen         = ICMPV6_MSG_MAX + 1,
1182                .mode           = 0644,
1183                .proc_handler = proc_do_large_bitmap,
1184        },
1185        { },
1186};
1187
1188struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1189{
1190        struct ctl_table *table;
1191
1192        table = kmemdup(ipv6_icmp_table_template,
1193                        sizeof(ipv6_icmp_table_template),
1194                        GFP_KERNEL);
1195
1196        if (table) {
1197                table[0].data = &net->ipv6.sysctl.icmpv6_time;
1198                table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1199                table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1200                table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1201                table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1202        }
1203        return table;
1204}
1205#endif
1206