linux/net/ipv6/icmp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Internet Control Message Protocol (ICMPv6)
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on net/ipv4/icmp.c
  10 *
  11 *      RFC 1885
  12 */
  13
  14/*
  15 *      Changes:
  16 *
  17 *      Andi Kleen              :       exception handling
  18 *      Andi Kleen                      add rate limits. never reply to a icmp.
  19 *                                      add more length checks and other fixes.
  20 *      yoshfuji                :       ensure to sent parameter problem for
  21 *                                      fragments.
  22 *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  23 *      Randy Dunlap and
  24 *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  25 *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  26 */
  27
  28#define pr_fmt(fmt) "IPv6: " fmt
  29
  30#include <linux/module.h>
  31#include <linux/errno.h>
  32#include <linux/types.h>
  33#include <linux/socket.h>
  34#include <linux/in.h>
  35#include <linux/kernel.h>
  36#include <linux/sockios.h>
  37#include <linux/net.h>
  38#include <linux/skbuff.h>
  39#include <linux/init.h>
  40#include <linux/netfilter.h>
  41#include <linux/slab.h>
  42
  43#ifdef CONFIG_SYSCTL
  44#include <linux/sysctl.h>
  45#endif
  46
  47#include <linux/inet.h>
  48#include <linux/netdevice.h>
  49#include <linux/icmpv6.h>
  50
  51#include <net/ip.h>
  52#include <net/sock.h>
  53
  54#include <net/ipv6.h>
  55#include <net/ip6_checksum.h>
  56#include <net/ping.h>
  57#include <net/protocol.h>
  58#include <net/raw.h>
  59#include <net/rawv6.h>
  60#include <net/transp_v6.h>
  61#include <net/ip6_route.h>
  62#include <net/addrconf.h>
  63#include <net/icmp.h>
  64#include <net/xfrm.h>
  65#include <net/inet_common.h>
  66#include <net/dsfield.h>
  67#include <net/l3mdev.h>
  68
  69#include <linux/uaccess.h>
  70
  71/*
  72 *      The ICMP socket(s). This is the most convenient way to flow control
  73 *      our ICMP output as well as maintain a clean interface throughout
  74 *      all layers. All Socketless IP sends will soon be gone.
  75 *
  76 *      On SMP we have one ICMP socket per-cpu.
  77 */
  78static struct sock *icmpv6_sk(struct net *net)
  79{
  80        return this_cpu_read(*net->ipv6.icmp_sk);
  81}
  82
  83static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  84                       u8 type, u8 code, int offset, __be32 info)
  85{
  86        /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  87        struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  88        struct net *net = dev_net(skb->dev);
  89
  90        if (type == ICMPV6_PKT_TOOBIG)
  91                ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
  92        else if (type == NDISC_REDIRECT)
  93                ip6_redirect(skb, net, skb->dev->ifindex, 0,
  94                             sock_net_uid(net, NULL));
  95
  96        if (!(type & ICMPV6_INFOMSG_MASK))
  97                if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
  98                        ping_err(skb, offset, ntohl(info));
  99
 100        return 0;
 101}
 102
 103static int icmpv6_rcv(struct sk_buff *skb);
 104
 105static const struct inet6_protocol icmpv6_protocol = {
 106        .handler        =       icmpv6_rcv,
 107        .err_handler    =       icmpv6_err,
 108        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 109};
 110
 111/* Called with BH disabled */
 112static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 113{
 114        struct sock *sk;
 115
 116        sk = icmpv6_sk(net);
 117        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 118                /* This can happen if the output path (f.e. SIT or
 119                 * ip6ip6 tunnel) signals dst_link_failure() for an
 120                 * outgoing ICMP6 packet.
 121                 */
 122                return NULL;
 123        }
 124        return sk;
 125}
 126
 127static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 128{
 129        spin_unlock(&sk->sk_lock.slock);
 130}
 131
 132/*
 133 * Figure out, may we reply to this packet with icmp error.
 134 *
 135 * We do not reply, if:
 136 *      - it was icmp error message.
 137 *      - it is truncated, so that it is known, that protocol is ICMPV6
 138 *        (i.e. in the middle of some exthdr)
 139 *
 140 *      --ANK (980726)
 141 */
 142
 143static bool is_ineligible(const struct sk_buff *skb)
 144{
 145        int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 146        int len = skb->len - ptr;
 147        __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 148        __be16 frag_off;
 149
 150        if (len < 0)
 151                return true;
 152
 153        ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 154        if (ptr < 0)
 155                return false;
 156        if (nexthdr == IPPROTO_ICMPV6) {
 157                u8 _type, *tp;
 158                tp = skb_header_pointer(skb,
 159                        ptr+offsetof(struct icmp6hdr, icmp6_type),
 160                        sizeof(_type), &_type);
 161                if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 162                        return true;
 163        }
 164        return false;
 165}
 166
 167static bool icmpv6_mask_allow(struct net *net, int type)
 168{
 169        if (type > ICMPV6_MSG_MAX)
 170                return true;
 171
 172        /* Limit if icmp type is set in ratemask. */
 173        if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
 174                return true;
 175
 176        return false;
 177}
 178
 179static bool icmpv6_global_allow(struct net *net, int type)
 180{
 181        if (icmpv6_mask_allow(net, type))
 182                return true;
 183
 184        if (icmp_global_allow())
 185                return true;
 186
 187        return false;
 188}
 189
 190/*
 191 * Check the ICMP output rate limit
 192 */
 193static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 194                               struct flowi6 *fl6)
 195{
 196        struct net *net = sock_net(sk);
 197        struct dst_entry *dst;
 198        bool res = false;
 199
 200        if (icmpv6_mask_allow(net, type))
 201                return true;
 202
 203        /*
 204         * Look up the output route.
 205         * XXX: perhaps the expire for routing entries cloned by
 206         * this lookup should be more aggressive (not longer than timeout).
 207         */
 208        dst = ip6_route_output(net, sk, fl6);
 209        if (dst->error) {
 210                IP6_INC_STATS(net, ip6_dst_idev(dst),
 211                              IPSTATS_MIB_OUTNOROUTES);
 212        } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 213                res = true;
 214        } else {
 215                struct rt6_info *rt = (struct rt6_info *)dst;
 216                int tmo = net->ipv6.sysctl.icmpv6_time;
 217                struct inet_peer *peer;
 218
 219                /* Give more bandwidth to wider prefixes. */
 220                if (rt->rt6i_dst.plen < 128)
 221                        tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 222
 223                peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 224                res = inet_peer_xrlim_allow(peer, tmo);
 225                if (peer)
 226                        inet_putpeer(peer);
 227        }
 228        dst_release(dst);
 229        return res;
 230}
 231
 232/*
 233 *      an inline helper for the "simple" if statement below
 234 *      checks if parameter problem report is caused by an
 235 *      unrecognized IPv6 option that has the Option Type
 236 *      highest-order two bits set to 10
 237 */
 238
 239static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 240{
 241        u8 _optval, *op;
 242
 243        offset += skb_network_offset(skb);
 244        op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 245        if (!op)
 246                return true;
 247        return (*op & 0xC0) == 0x80;
 248}
 249
 250void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 251                                struct icmp6hdr *thdr, int len)
 252{
 253        struct sk_buff *skb;
 254        struct icmp6hdr *icmp6h;
 255
 256        skb = skb_peek(&sk->sk_write_queue);
 257        if (!skb)
 258                return;
 259
 260        icmp6h = icmp6_hdr(skb);
 261        memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 262        icmp6h->icmp6_cksum = 0;
 263
 264        if (skb_queue_len(&sk->sk_write_queue) == 1) {
 265                skb->csum = csum_partial(icmp6h,
 266                                        sizeof(struct icmp6hdr), skb->csum);
 267                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 268                                                      &fl6->daddr,
 269                                                      len, fl6->flowi6_proto,
 270                                                      skb->csum);
 271        } else {
 272                __wsum tmp_csum = 0;
 273
 274                skb_queue_walk(&sk->sk_write_queue, skb) {
 275                        tmp_csum = csum_add(tmp_csum, skb->csum);
 276                }
 277
 278                tmp_csum = csum_partial(icmp6h,
 279                                        sizeof(struct icmp6hdr), tmp_csum);
 280                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 281                                                      &fl6->daddr,
 282                                                      len, fl6->flowi6_proto,
 283                                                      tmp_csum);
 284        }
 285        ip6_push_pending_frames(sk);
 286}
 287
 288struct icmpv6_msg {
 289        struct sk_buff  *skb;
 290        int             offset;
 291        uint8_t         type;
 292};
 293
 294static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 295{
 296        struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 297        struct sk_buff *org_skb = msg->skb;
 298        __wsum csum = 0;
 299
 300        csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 301                                      to, len, csum);
 302        skb->csum = csum_block_add(skb->csum, csum, odd);
 303        if (!(msg->type & ICMPV6_INFOMSG_MASK))
 304                nf_ct_attach(skb, org_skb);
 305        return 0;
 306}
 307
 308#if IS_ENABLED(CONFIG_IPV6_MIP6)
 309static void mip6_addr_swap(struct sk_buff *skb)
 310{
 311        struct ipv6hdr *iph = ipv6_hdr(skb);
 312        struct inet6_skb_parm *opt = IP6CB(skb);
 313        struct ipv6_destopt_hao *hao;
 314        struct in6_addr tmp;
 315        int off;
 316
 317        if (opt->dsthao) {
 318                off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 319                if (likely(off >= 0)) {
 320                        hao = (struct ipv6_destopt_hao *)
 321                                        (skb_network_header(skb) + off);
 322                        tmp = iph->saddr;
 323                        iph->saddr = hao->addr;
 324                        hao->addr = tmp;
 325                }
 326        }
 327}
 328#else
 329static inline void mip6_addr_swap(struct sk_buff *skb) {}
 330#endif
 331
 332static struct dst_entry *icmpv6_route_lookup(struct net *net,
 333                                             struct sk_buff *skb,
 334                                             struct sock *sk,
 335                                             struct flowi6 *fl6)
 336{
 337        struct dst_entry *dst, *dst2;
 338        struct flowi6 fl2;
 339        int err;
 340
 341        err = ip6_dst_lookup(net, sk, &dst, fl6);
 342        if (err)
 343                return ERR_PTR(err);
 344
 345        /*
 346         * We won't send icmp if the destination is known
 347         * anycast.
 348         */
 349        if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 350                net_dbg_ratelimited("icmp6_send: acast source\n");
 351                dst_release(dst);
 352                return ERR_PTR(-EINVAL);
 353        }
 354
 355        /* No need to clone since we're just using its address. */
 356        dst2 = dst;
 357
 358        dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 359        if (!IS_ERR(dst)) {
 360                if (dst != dst2)
 361                        return dst;
 362        } else {
 363                if (PTR_ERR(dst) == -EPERM)
 364                        dst = NULL;
 365                else
 366                        return dst;
 367        }
 368
 369        err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 370        if (err)
 371                goto relookup_failed;
 372
 373        err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 374        if (err)
 375                goto relookup_failed;
 376
 377        dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 378        if (!IS_ERR(dst2)) {
 379                dst_release(dst);
 380                dst = dst2;
 381        } else {
 382                err = PTR_ERR(dst2);
 383                if (err == -EPERM) {
 384                        dst_release(dst);
 385                        return dst2;
 386                } else
 387                        goto relookup_failed;
 388        }
 389
 390relookup_failed:
 391        if (dst)
 392                return dst;
 393        return ERR_PTR(err);
 394}
 395
 396static struct net_device *icmp6_dev(const struct sk_buff *skb)
 397{
 398        struct net_device *dev = skb->dev;
 399
 400        /* for local traffic to local address, skb dev is the loopback
 401         * device. Check if there is a dst attached to the skb and if so
 402         * get the real device index. Same is needed for replies to a link
 403         * local address on a device enslaved to an L3 master device
 404         */
 405        if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 406                const struct rt6_info *rt6 = skb_rt6_info(skb);
 407
 408                if (rt6)
 409                        dev = rt6->rt6i_idev->dev;
 410        }
 411
 412        return dev;
 413}
 414
 415static int icmp6_iif(const struct sk_buff *skb)
 416{
 417        return icmp6_dev(skb)->ifindex;
 418}
 419
 420/*
 421 *      Send an ICMP message in response to a packet in error
 422 */
 423static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 424                       const struct in6_addr *force_saddr)
 425{
 426        struct inet6_dev *idev = NULL;
 427        struct ipv6hdr *hdr = ipv6_hdr(skb);
 428        struct sock *sk;
 429        struct net *net;
 430        struct ipv6_pinfo *np;
 431        const struct in6_addr *saddr = NULL;
 432        struct dst_entry *dst;
 433        struct icmp6hdr tmp_hdr;
 434        struct flowi6 fl6;
 435        struct icmpv6_msg msg;
 436        struct ipcm6_cookie ipc6;
 437        int iif = 0;
 438        int addr_type = 0;
 439        int len;
 440        u32 mark;
 441
 442        if ((u8 *)hdr < skb->head ||
 443            (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 444                return;
 445
 446        if (!skb->dev)
 447                return;
 448        net = dev_net(skb->dev);
 449        mark = IP6_REPLY_MARK(net, skb->mark);
 450        /*
 451         *      Make sure we respect the rules
 452         *      i.e. RFC 1885 2.4(e)
 453         *      Rule (e.1) is enforced by not using icmp6_send
 454         *      in any code that processes icmp errors.
 455         */
 456        addr_type = ipv6_addr_type(&hdr->daddr);
 457
 458        if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 459            ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 460                saddr = &hdr->daddr;
 461
 462        /*
 463         *      Dest addr check
 464         */
 465
 466        if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 467                if (type != ICMPV6_PKT_TOOBIG &&
 468                    !(type == ICMPV6_PARAMPROB &&
 469                      code == ICMPV6_UNK_OPTION &&
 470                      (opt_unrec(skb, info))))
 471                        return;
 472
 473                saddr = NULL;
 474        }
 475
 476        addr_type = ipv6_addr_type(&hdr->saddr);
 477
 478        /*
 479         *      Source addr check
 480         */
 481
 482        if (__ipv6_addr_needs_scope_id(addr_type)) {
 483                iif = icmp6_iif(skb);
 484        } else {
 485                dst = skb_dst(skb);
 486                iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
 487        }
 488
 489        /*
 490         *      Must not send error if the source does not uniquely
 491         *      identify a single node (RFC2463 Section 2.4).
 492         *      We check unspecified / multicast addresses here,
 493         *      and anycast addresses will be checked later.
 494         */
 495        if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 496                net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 497                                    &hdr->saddr, &hdr->daddr);
 498                return;
 499        }
 500
 501        /*
 502         *      Never answer to a ICMP packet.
 503         */
 504        if (is_ineligible(skb)) {
 505                net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 506                                    &hdr->saddr, &hdr->daddr);
 507                return;
 508        }
 509
 510        /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 511        local_bh_disable();
 512
 513        /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 514        if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
 515                goto out_bh_enable;
 516
 517        mip6_addr_swap(skb);
 518
 519        memset(&fl6, 0, sizeof(fl6));
 520        fl6.flowi6_proto = IPPROTO_ICMPV6;
 521        fl6.daddr = hdr->saddr;
 522        if (force_saddr)
 523                saddr = force_saddr;
 524        if (saddr)
 525                fl6.saddr = *saddr;
 526        fl6.flowi6_mark = mark;
 527        fl6.flowi6_oif = iif;
 528        fl6.fl6_icmp_type = type;
 529        fl6.fl6_icmp_code = code;
 530        fl6.flowi6_uid = sock_net_uid(net, NULL);
 531        fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 532        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 533
 534        sk = icmpv6_xmit_lock(net);
 535        if (!sk)
 536                goto out_bh_enable;
 537
 538        sk->sk_mark = mark;
 539        np = inet6_sk(sk);
 540
 541        if (!icmpv6_xrlim_allow(sk, type, &fl6))
 542                goto out;
 543
 544        tmp_hdr.icmp6_type = type;
 545        tmp_hdr.icmp6_code = code;
 546        tmp_hdr.icmp6_cksum = 0;
 547        tmp_hdr.icmp6_pointer = htonl(info);
 548
 549        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 550                fl6.flowi6_oif = np->mcast_oif;
 551        else if (!fl6.flowi6_oif)
 552                fl6.flowi6_oif = np->ucast_oif;
 553
 554        ipcm6_init_sk(&ipc6, np);
 555        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 556
 557        dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 558        if (IS_ERR(dst))
 559                goto out;
 560
 561        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 562
 563        msg.skb = skb;
 564        msg.offset = skb_network_offset(skb);
 565        msg.type = type;
 566
 567        len = skb->len - msg.offset;
 568        len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 569        if (len < 0) {
 570                net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 571                                    &hdr->saddr, &hdr->daddr);
 572                goto out_dst_release;
 573        }
 574
 575        rcu_read_lock();
 576        idev = __in6_dev_get(skb->dev);
 577
 578        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 579                            len + sizeof(struct icmp6hdr),
 580                            sizeof(struct icmp6hdr),
 581                            &ipc6, &fl6, (struct rt6_info *)dst,
 582                            MSG_DONTWAIT)) {
 583                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 584                ip6_flush_pending_frames(sk);
 585        } else {
 586                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 587                                           len + sizeof(struct icmp6hdr));
 588        }
 589        rcu_read_unlock();
 590out_dst_release:
 591        dst_release(dst);
 592out:
 593        icmpv6_xmit_unlock(sk);
 594out_bh_enable:
 595        local_bh_enable();
 596}
 597
 598/* Slightly more convenient version of icmp6_send.
 599 */
 600void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 601{
 602        icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 603        kfree_skb(skb);
 604}
 605
 606/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 607 * if sufficient data bytes are available
 608 * @nhs is the size of the tunnel header(s) :
 609 *  Either an IPv4 header for SIT encap
 610 *         an IPv4 header + GRE header for GRE encap
 611 */
 612int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 613                               unsigned int data_len)
 614{
 615        struct in6_addr temp_saddr;
 616        struct rt6_info *rt;
 617        struct sk_buff *skb2;
 618        u32 info = 0;
 619
 620        if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 621                return 1;
 622
 623        /* RFC 4884 (partial) support for ICMP extensions */
 624        if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 625                data_len = 0;
 626
 627        skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 628
 629        if (!skb2)
 630                return 1;
 631
 632        skb_dst_drop(skb2);
 633        skb_pull(skb2, nhs);
 634        skb_reset_network_header(skb2);
 635
 636        rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 637                        skb, 0);
 638
 639        if (rt && rt->dst.dev)
 640                skb2->dev = rt->dst.dev;
 641
 642        ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 643
 644        if (data_len) {
 645                /* RFC 4884 (partial) support :
 646                 * insert 0 padding at the end, before the extensions
 647                 */
 648                __skb_push(skb2, nhs);
 649                skb_reset_network_header(skb2);
 650                memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 651                memset(skb2->data + data_len - nhs, 0, nhs);
 652                /* RFC 4884 4.5 : Length is measured in 64-bit words,
 653                 * and stored in reserved[0]
 654                 */
 655                info = (data_len/8) << 24;
 656        }
 657        if (type == ICMP_TIME_EXCEEDED)
 658                icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 659                           info, &temp_saddr);
 660        else
 661                icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 662                           info, &temp_saddr);
 663        if (rt)
 664                ip6_rt_put(rt);
 665
 666        kfree_skb(skb2);
 667
 668        return 0;
 669}
 670EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 671
 672static void icmpv6_echo_reply(struct sk_buff *skb)
 673{
 674        struct net *net = dev_net(skb->dev);
 675        struct sock *sk;
 676        struct inet6_dev *idev;
 677        struct ipv6_pinfo *np;
 678        const struct in6_addr *saddr = NULL;
 679        struct icmp6hdr *icmph = icmp6_hdr(skb);
 680        struct icmp6hdr tmp_hdr;
 681        struct flowi6 fl6;
 682        struct icmpv6_msg msg;
 683        struct dst_entry *dst;
 684        struct ipcm6_cookie ipc6;
 685        u32 mark = IP6_REPLY_MARK(net, skb->mark);
 686        bool acast;
 687
 688        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
 689            net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
 690                return;
 691
 692        saddr = &ipv6_hdr(skb)->daddr;
 693
 694        acast = ipv6_anycast_destination(skb_dst(skb), saddr);
 695        if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
 696                return;
 697
 698        if (!ipv6_unicast_destination(skb) &&
 699            !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
 700                saddr = NULL;
 701
 702        memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 703        tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 704
 705        memset(&fl6, 0, sizeof(fl6));
 706        if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
 707                fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
 708
 709        fl6.flowi6_proto = IPPROTO_ICMPV6;
 710        fl6.daddr = ipv6_hdr(skb)->saddr;
 711        if (saddr)
 712                fl6.saddr = *saddr;
 713        fl6.flowi6_oif = icmp6_iif(skb);
 714        fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 715        fl6.flowi6_mark = mark;
 716        fl6.flowi6_uid = sock_net_uid(net, NULL);
 717        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 718
 719        local_bh_disable();
 720        sk = icmpv6_xmit_lock(net);
 721        if (!sk)
 722                goto out_bh_enable;
 723        sk->sk_mark = mark;
 724        np = inet6_sk(sk);
 725
 726        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 727                fl6.flowi6_oif = np->mcast_oif;
 728        else if (!fl6.flowi6_oif)
 729                fl6.flowi6_oif = np->ucast_oif;
 730
 731        if (ip6_dst_lookup(net, sk, &dst, &fl6))
 732                goto out;
 733        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 734        if (IS_ERR(dst))
 735                goto out;
 736
 737        /* Check the ratelimit */
 738        if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
 739            !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
 740                goto out_dst_release;
 741
 742        idev = __in6_dev_get(skb->dev);
 743
 744        msg.skb = skb;
 745        msg.offset = 0;
 746        msg.type = ICMPV6_ECHO_REPLY;
 747
 748        ipcm6_init_sk(&ipc6, np);
 749        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 750        ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 751
 752        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 753                            skb->len + sizeof(struct icmp6hdr),
 754                            sizeof(struct icmp6hdr), &ipc6, &fl6,
 755                            (struct rt6_info *)dst, MSG_DONTWAIT)) {
 756                __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 757                ip6_flush_pending_frames(sk);
 758        } else {
 759                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 760                                           skb->len + sizeof(struct icmp6hdr));
 761        }
 762out_dst_release:
 763        dst_release(dst);
 764out:
 765        icmpv6_xmit_unlock(sk);
 766out_bh_enable:
 767        local_bh_enable();
 768}
 769
 770void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 771{
 772        const struct inet6_protocol *ipprot;
 773        int inner_offset;
 774        __be16 frag_off;
 775        u8 nexthdr;
 776        struct net *net = dev_net(skb->dev);
 777
 778        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 779                goto out;
 780
 781        nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 782        if (ipv6_ext_hdr(nexthdr)) {
 783                /* now skip over extension headers */
 784                inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 785                                                &nexthdr, &frag_off);
 786                if (inner_offset < 0)
 787                        goto out;
 788        } else {
 789                inner_offset = sizeof(struct ipv6hdr);
 790        }
 791
 792        /* Checkin header including 8 bytes of inner protocol header. */
 793        if (!pskb_may_pull(skb, inner_offset+8))
 794                goto out;
 795
 796        /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 797           Without this we will not able f.e. to make source routed
 798           pmtu discovery.
 799           Corresponding argument (opt) to notifiers is already added.
 800           --ANK (980726)
 801         */
 802
 803        ipprot = rcu_dereference(inet6_protos[nexthdr]);
 804        if (ipprot && ipprot->err_handler)
 805                ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 806
 807        raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 808        return;
 809
 810out:
 811        __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 812}
 813
 814/*
 815 *      Handle icmp messages
 816 */
 817
 818static int icmpv6_rcv(struct sk_buff *skb)
 819{
 820        struct net *net = dev_net(skb->dev);
 821        struct net_device *dev = icmp6_dev(skb);
 822        struct inet6_dev *idev = __in6_dev_get(dev);
 823        const struct in6_addr *saddr, *daddr;
 824        struct icmp6hdr *hdr;
 825        u8 type;
 826        bool success = false;
 827
 828        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 829                struct sec_path *sp = skb_sec_path(skb);
 830                int nh;
 831
 832                if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 833                                 XFRM_STATE_ICMP))
 834                        goto drop_no_count;
 835
 836                if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 837                        goto drop_no_count;
 838
 839                nh = skb_network_offset(skb);
 840                skb_set_network_header(skb, sizeof(*hdr));
 841
 842                if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
 843                        goto drop_no_count;
 844
 845                skb_set_network_header(skb, nh);
 846        }
 847
 848        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 849
 850        saddr = &ipv6_hdr(skb)->saddr;
 851        daddr = &ipv6_hdr(skb)->daddr;
 852
 853        if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 854                net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 855                                    saddr, daddr);
 856                goto csum_error;
 857        }
 858
 859        if (!pskb_pull(skb, sizeof(*hdr)))
 860                goto discard_it;
 861
 862        hdr = icmp6_hdr(skb);
 863
 864        type = hdr->icmp6_type;
 865
 866        ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 867
 868        switch (type) {
 869        case ICMPV6_ECHO_REQUEST:
 870                if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 871                        icmpv6_echo_reply(skb);
 872                break;
 873
 874        case ICMPV6_ECHO_REPLY:
 875                success = ping_rcv(skb);
 876                break;
 877
 878        case ICMPV6_PKT_TOOBIG:
 879                /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 880                   standard destination cache. Seems, only "advanced"
 881                   destination cache will allow to solve this problem
 882                   --ANK (980726)
 883                 */
 884                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 885                        goto discard_it;
 886                hdr = icmp6_hdr(skb);
 887
 888                /* to notify */
 889                /* fall through */
 890        case ICMPV6_DEST_UNREACH:
 891        case ICMPV6_TIME_EXCEED:
 892        case ICMPV6_PARAMPROB:
 893                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 894                break;
 895
 896        case NDISC_ROUTER_SOLICITATION:
 897        case NDISC_ROUTER_ADVERTISEMENT:
 898        case NDISC_NEIGHBOUR_SOLICITATION:
 899        case NDISC_NEIGHBOUR_ADVERTISEMENT:
 900        case NDISC_REDIRECT:
 901                ndisc_rcv(skb);
 902                break;
 903
 904        case ICMPV6_MGM_QUERY:
 905                igmp6_event_query(skb);
 906                break;
 907
 908        case ICMPV6_MGM_REPORT:
 909                igmp6_event_report(skb);
 910                break;
 911
 912        case ICMPV6_MGM_REDUCTION:
 913        case ICMPV6_NI_QUERY:
 914        case ICMPV6_NI_REPLY:
 915        case ICMPV6_MLD2_REPORT:
 916        case ICMPV6_DHAAD_REQUEST:
 917        case ICMPV6_DHAAD_REPLY:
 918        case ICMPV6_MOBILE_PREFIX_SOL:
 919        case ICMPV6_MOBILE_PREFIX_ADV:
 920                break;
 921
 922        default:
 923                /* informational */
 924                if (type & ICMPV6_INFOMSG_MASK)
 925                        break;
 926
 927                net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 928                                    saddr, daddr);
 929
 930                /*
 931                 * error of unknown type.
 932                 * must pass to upper level
 933                 */
 934
 935                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 936        }
 937
 938        /* until the v6 path can be better sorted assume failure and
 939         * preserve the status quo behaviour for the rest of the paths to here
 940         */
 941        if (success)
 942                consume_skb(skb);
 943        else
 944                kfree_skb(skb);
 945
 946        return 0;
 947
 948csum_error:
 949        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 950discard_it:
 951        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 952drop_no_count:
 953        kfree_skb(skb);
 954        return 0;
 955}
 956
 957void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 958                      u8 type,
 959                      const struct in6_addr *saddr,
 960                      const struct in6_addr *daddr,
 961                      int oif)
 962{
 963        memset(fl6, 0, sizeof(*fl6));
 964        fl6->saddr = *saddr;
 965        fl6->daddr = *daddr;
 966        fl6->flowi6_proto       = IPPROTO_ICMPV6;
 967        fl6->fl6_icmp_type      = type;
 968        fl6->fl6_icmp_code      = 0;
 969        fl6->flowi6_oif         = oif;
 970        security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 971}
 972
 973static void __net_exit icmpv6_sk_exit(struct net *net)
 974{
 975        int i;
 976
 977        for_each_possible_cpu(i)
 978                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
 979        free_percpu(net->ipv6.icmp_sk);
 980}
 981
 982static int __net_init icmpv6_sk_init(struct net *net)
 983{
 984        struct sock *sk;
 985        int err, i;
 986
 987        net->ipv6.icmp_sk = alloc_percpu(struct sock *);
 988        if (!net->ipv6.icmp_sk)
 989                return -ENOMEM;
 990
 991        for_each_possible_cpu(i) {
 992                err = inet_ctl_sock_create(&sk, PF_INET6,
 993                                           SOCK_RAW, IPPROTO_ICMPV6, net);
 994                if (err < 0) {
 995                        pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
 996                               err);
 997                        goto fail;
 998                }
 999
1000                *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1001
1002                /* Enough space for 2 64K ICMP packets, including
1003                 * sk_buff struct overhead.
1004                 */
1005                sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1006        }
1007        return 0;
1008
1009 fail:
1010        icmpv6_sk_exit(net);
1011        return err;
1012}
1013
1014static struct pernet_operations icmpv6_sk_ops = {
1015        .init = icmpv6_sk_init,
1016        .exit = icmpv6_sk_exit,
1017};
1018
1019int __init icmpv6_init(void)
1020{
1021        int err;
1022
1023        err = register_pernet_subsys(&icmpv6_sk_ops);
1024        if (err < 0)
1025                return err;
1026
1027        err = -EAGAIN;
1028        if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1029                goto fail;
1030
1031        err = inet6_register_icmp_sender(icmp6_send);
1032        if (err)
1033                goto sender_reg_err;
1034        return 0;
1035
1036sender_reg_err:
1037        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1038fail:
1039        pr_err("Failed to register ICMP6 protocol\n");
1040        unregister_pernet_subsys(&icmpv6_sk_ops);
1041        return err;
1042}
1043
1044void icmpv6_cleanup(void)
1045{
1046        inet6_unregister_icmp_sender(icmp6_send);
1047        unregister_pernet_subsys(&icmpv6_sk_ops);
1048        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1049}
1050
1051
1052static const struct icmp6_err {
1053        int err;
1054        int fatal;
1055} tab_unreach[] = {
1056        {       /* NOROUTE */
1057                .err    = ENETUNREACH,
1058                .fatal  = 0,
1059        },
1060        {       /* ADM_PROHIBITED */
1061                .err    = EACCES,
1062                .fatal  = 1,
1063        },
1064        {       /* Was NOT_NEIGHBOUR, now reserved */
1065                .err    = EHOSTUNREACH,
1066                .fatal  = 0,
1067        },
1068        {       /* ADDR_UNREACH */
1069                .err    = EHOSTUNREACH,
1070                .fatal  = 0,
1071        },
1072        {       /* PORT_UNREACH */
1073                .err    = ECONNREFUSED,
1074                .fatal  = 1,
1075        },
1076        {       /* POLICY_FAIL */
1077                .err    = EACCES,
1078                .fatal  = 1,
1079        },
1080        {       /* REJECT_ROUTE */
1081                .err    = EACCES,
1082                .fatal  = 1,
1083        },
1084};
1085
1086int icmpv6_err_convert(u8 type, u8 code, int *err)
1087{
1088        int fatal = 0;
1089
1090        *err = EPROTO;
1091
1092        switch (type) {
1093        case ICMPV6_DEST_UNREACH:
1094                fatal = 1;
1095                if (code < ARRAY_SIZE(tab_unreach)) {
1096                        *err  = tab_unreach[code].err;
1097                        fatal = tab_unreach[code].fatal;
1098                }
1099                break;
1100
1101        case ICMPV6_PKT_TOOBIG:
1102                *err = EMSGSIZE;
1103                break;
1104
1105        case ICMPV6_PARAMPROB:
1106                *err = EPROTO;
1107                fatal = 1;
1108                break;
1109
1110        case ICMPV6_TIME_EXCEED:
1111                *err = EHOSTUNREACH;
1112                break;
1113        }
1114
1115        return fatal;
1116}
1117EXPORT_SYMBOL(icmpv6_err_convert);
1118
1119#ifdef CONFIG_SYSCTL
1120static struct ctl_table ipv6_icmp_table_template[] = {
1121        {
1122                .procname       = "ratelimit",
1123                .data           = &init_net.ipv6.sysctl.icmpv6_time,
1124                .maxlen         = sizeof(int),
1125                .mode           = 0644,
1126                .proc_handler   = proc_dointvec_ms_jiffies,
1127        },
1128        {
1129                .procname       = "echo_ignore_all",
1130                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1131                .maxlen         = sizeof(int),
1132                .mode           = 0644,
1133                .proc_handler = proc_dointvec,
1134        },
1135        {
1136                .procname       = "echo_ignore_multicast",
1137                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1138                .maxlen         = sizeof(int),
1139                .mode           = 0644,
1140                .proc_handler = proc_dointvec,
1141        },
1142        {
1143                .procname       = "echo_ignore_anycast",
1144                .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1145                .maxlen         = sizeof(int),
1146                .mode           = 0644,
1147                .proc_handler = proc_dointvec,
1148        },
1149        {
1150                .procname       = "ratemask",
1151                .data           = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1152                .maxlen         = ICMPV6_MSG_MAX + 1,
1153                .mode           = 0644,
1154                .proc_handler = proc_do_large_bitmap,
1155        },
1156        { },
1157};
1158
1159struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1160{
1161        struct ctl_table *table;
1162
1163        table = kmemdup(ipv6_icmp_table_template,
1164                        sizeof(ipv6_icmp_table_template),
1165                        GFP_KERNEL);
1166
1167        if (table) {
1168                table[0].data = &net->ipv6.sysctl.icmpv6_time;
1169                table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1170                table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1171                table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1172                table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1173        }
1174        return table;
1175}
1176#endif
1177