linux/net/ipv6/icmp.c
<<
>>
Prefs
   1/*
   2 *      Internet Control Message Protocol (ICMPv6)
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on net/ipv4/icmp.c
   9 *
  10 *      RFC 1885
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 */
  17
  18/*
  19 *      Changes:
  20 *
  21 *      Andi Kleen              :       exception handling
  22 *      Andi Kleen                      add rate limits. never reply to a icmp.
  23 *                                      add more length checks and other fixes.
  24 *      yoshfuji                :       ensure to sent parameter problem for
  25 *                                      fragments.
  26 *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  27 *      Randy Dunlap and
  28 *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  29 *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  30 */
  31
  32#define pr_fmt(fmt) "IPv6: " fmt
  33
  34#include <linux/module.h>
  35#include <linux/errno.h>
  36#include <linux/types.h>
  37#include <linux/socket.h>
  38#include <linux/in.h>
  39#include <linux/kernel.h>
  40#include <linux/sockios.h>
  41#include <linux/net.h>
  42#include <linux/skbuff.h>
  43#include <linux/init.h>
  44#include <linux/netfilter.h>
  45#include <linux/slab.h>
  46
  47#ifdef CONFIG_SYSCTL
  48#include <linux/sysctl.h>
  49#endif
  50
  51#include <linux/inet.h>
  52#include <linux/netdevice.h>
  53#include <linux/icmpv6.h>
  54
  55#include <net/ip.h>
  56#include <net/sock.h>
  57
  58#include <net/ipv6.h>
  59#include <net/ip6_checksum.h>
  60#include <net/ping.h>
  61#include <net/protocol.h>
  62#include <net/raw.h>
  63#include <net/rawv6.h>
  64#include <net/transp_v6.h>
  65#include <net/ip6_route.h>
  66#include <net/addrconf.h>
  67#include <net/icmp.h>
  68#include <net/xfrm.h>
  69#include <net/inet_common.h>
  70#include <net/dsfield.h>
  71#include <net/l3mdev.h>
  72
  73#include <linux/uaccess.h>
  74
  75/*
  76 *      The ICMP socket(s). This is the most convenient way to flow control
  77 *      our ICMP output as well as maintain a clean interface throughout
  78 *      all layers. All Socketless IP sends will soon be gone.
  79 *
  80 *      On SMP we have one ICMP socket per-cpu.
  81 */
  82static inline struct sock *icmpv6_sk(struct net *net)
  83{
  84        return net->ipv6.icmp_sk[smp_processor_id()];
  85}
  86
  87static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  88                       u8 type, u8 code, int offset, __be32 info)
  89{
  90        /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  91        struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  92        struct net *net = dev_net(skb->dev);
  93
  94        if (type == ICMPV6_PKT_TOOBIG)
  95                ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
  96        else if (type == NDISC_REDIRECT)
  97                ip6_redirect(skb, net, skb->dev->ifindex, 0,
  98                             sock_net_uid(net, NULL));
  99
 100        if (!(type & ICMPV6_INFOMSG_MASK))
 101                if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
 102                        ping_err(skb, offset, ntohl(info));
 103}
 104
 105static int icmpv6_rcv(struct sk_buff *skb);
 106
 107static const struct inet6_protocol icmpv6_protocol = {
 108        .handler        =       icmpv6_rcv,
 109        .err_handler    =       icmpv6_err,
 110        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 111};
 112
 113/* Called with BH disabled */
 114static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 115{
 116        struct sock *sk;
 117
 118        sk = icmpv6_sk(net);
 119        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 120                /* This can happen if the output path (f.e. SIT or
 121                 * ip6ip6 tunnel) signals dst_link_failure() for an
 122                 * outgoing ICMP6 packet.
 123                 */
 124                return NULL;
 125        }
 126        return sk;
 127}
 128
 129static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 130{
 131        spin_unlock(&sk->sk_lock.slock);
 132}
 133
 134/*
 135 * Figure out, may we reply to this packet with icmp error.
 136 *
 137 * We do not reply, if:
 138 *      - it was icmp error message.
 139 *      - it is truncated, so that it is known, that protocol is ICMPV6
 140 *        (i.e. in the middle of some exthdr)
 141 *
 142 *      --ANK (980726)
 143 */
 144
 145static bool is_ineligible(const struct sk_buff *skb)
 146{
 147        int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 148        int len = skb->len - ptr;
 149        __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 150        __be16 frag_off;
 151
 152        if (len < 0)
 153                return true;
 154
 155        ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 156        if (ptr < 0)
 157                return false;
 158        if (nexthdr == IPPROTO_ICMPV6) {
 159                u8 _type, *tp;
 160                tp = skb_header_pointer(skb,
 161                        ptr+offsetof(struct icmp6hdr, icmp6_type),
 162                        sizeof(_type), &_type);
 163                if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 164                        return true;
 165        }
 166        return false;
 167}
 168
 169static bool icmpv6_mask_allow(int type)
 170{
 171        /* Informational messages are not limited. */
 172        if (type & ICMPV6_INFOMSG_MASK)
 173                return true;
 174
 175        /* Do not limit pmtu discovery, it would break it. */
 176        if (type == ICMPV6_PKT_TOOBIG)
 177                return true;
 178
 179        return false;
 180}
 181
 182static bool icmpv6_global_allow(int type)
 183{
 184        if (icmpv6_mask_allow(type))
 185                return true;
 186
 187        if (icmp_global_allow())
 188                return true;
 189
 190        return false;
 191}
 192
 193/*
 194 * Check the ICMP output rate limit
 195 */
 196static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 197                               struct flowi6 *fl6)
 198{
 199        struct net *net = sock_net(sk);
 200        struct dst_entry *dst;
 201        bool res = false;
 202
 203        if (icmpv6_mask_allow(type))
 204                return true;
 205
 206        /*
 207         * Look up the output route.
 208         * XXX: perhaps the expire for routing entries cloned by
 209         * this lookup should be more aggressive (not longer than timeout).
 210         */
 211        dst = ip6_route_output(net, sk, fl6);
 212        if (dst->error) {
 213                IP6_INC_STATS(net, ip6_dst_idev(dst),
 214                              IPSTATS_MIB_OUTNOROUTES);
 215        } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 216                res = true;
 217        } else {
 218                struct rt6_info *rt = (struct rt6_info *)dst;
 219                int tmo = net->ipv6.sysctl.icmpv6_time;
 220                struct inet_peer *peer;
 221
 222                /* Give more bandwidth to wider prefixes. */
 223                if (rt->rt6i_dst.plen < 128)
 224                        tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 225
 226                peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 227                res = inet_peer_xrlim_allow(peer, tmo);
 228                if (peer)
 229                        inet_putpeer(peer);
 230        }
 231        dst_release(dst);
 232        return res;
 233}
 234
 235/*
 236 *      an inline helper for the "simple" if statement below
 237 *      checks if parameter problem report is caused by an
 238 *      unrecognized IPv6 option that has the Option Type
 239 *      highest-order two bits set to 10
 240 */
 241
 242static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 243{
 244        u8 _optval, *op;
 245
 246        offset += skb_network_offset(skb);
 247        op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 248        if (!op)
 249                return true;
 250        return (*op & 0xC0) == 0x80;
 251}
 252
 253void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 254                                struct icmp6hdr *thdr, int len)
 255{
 256        struct sk_buff *skb;
 257        struct icmp6hdr *icmp6h;
 258
 259        skb = skb_peek(&sk->sk_write_queue);
 260        if (!skb)
 261                return;
 262
 263        icmp6h = icmp6_hdr(skb);
 264        memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 265        icmp6h->icmp6_cksum = 0;
 266
 267        if (skb_queue_len(&sk->sk_write_queue) == 1) {
 268                skb->csum = csum_partial(icmp6h,
 269                                        sizeof(struct icmp6hdr), skb->csum);
 270                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 271                                                      &fl6->daddr,
 272                                                      len, fl6->flowi6_proto,
 273                                                      skb->csum);
 274        } else {
 275                __wsum tmp_csum = 0;
 276
 277                skb_queue_walk(&sk->sk_write_queue, skb) {
 278                        tmp_csum = csum_add(tmp_csum, skb->csum);
 279                }
 280
 281                tmp_csum = csum_partial(icmp6h,
 282                                        sizeof(struct icmp6hdr), tmp_csum);
 283                icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 284                                                      &fl6->daddr,
 285                                                      len, fl6->flowi6_proto,
 286                                                      tmp_csum);
 287        }
 288        ip6_push_pending_frames(sk);
 289}
 290
 291struct icmpv6_msg {
 292        struct sk_buff  *skb;
 293        int             offset;
 294        uint8_t         type;
 295};
 296
 297static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 298{
 299        struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 300        struct sk_buff *org_skb = msg->skb;
 301        __wsum csum = 0;
 302
 303        csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 304                                      to, len, csum);
 305        skb->csum = csum_block_add(skb->csum, csum, odd);
 306        if (!(msg->type & ICMPV6_INFOMSG_MASK))
 307                nf_ct_attach(skb, org_skb);
 308        return 0;
 309}
 310
 311#if IS_ENABLED(CONFIG_IPV6_MIP6)
 312static void mip6_addr_swap(struct sk_buff *skb)
 313{
 314        struct ipv6hdr *iph = ipv6_hdr(skb);
 315        struct inet6_skb_parm *opt = IP6CB(skb);
 316        struct ipv6_destopt_hao *hao;
 317        struct in6_addr tmp;
 318        int off;
 319
 320        if (opt->dsthao) {
 321                off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 322                if (likely(off >= 0)) {
 323                        hao = (struct ipv6_destopt_hao *)
 324                                        (skb_network_header(skb) + off);
 325                        tmp = iph->saddr;
 326                        iph->saddr = hao->addr;
 327                        hao->addr = tmp;
 328                }
 329        }
 330}
 331#else
 332static inline void mip6_addr_swap(struct sk_buff *skb) {}
 333#endif
 334
 335static struct dst_entry *icmpv6_route_lookup(struct net *net,
 336                                             struct sk_buff *skb,
 337                                             struct sock *sk,
 338                                             struct flowi6 *fl6)
 339{
 340        struct dst_entry *dst, *dst2;
 341        struct flowi6 fl2;
 342        int err;
 343
 344        err = ip6_dst_lookup(net, sk, &dst, fl6);
 345        if (err)
 346                return ERR_PTR(err);
 347
 348        /*
 349         * We won't send icmp if the destination is known
 350         * anycast.
 351         */
 352        if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 353                net_dbg_ratelimited("icmp6_send: acast source\n");
 354                dst_release(dst);
 355                return ERR_PTR(-EINVAL);
 356        }
 357
 358        /* No need to clone since we're just using its address. */
 359        dst2 = dst;
 360
 361        dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 362        if (!IS_ERR(dst)) {
 363                if (dst != dst2)
 364                        return dst;
 365        } else {
 366                if (PTR_ERR(dst) == -EPERM)
 367                        dst = NULL;
 368                else
 369                        return dst;
 370        }
 371
 372        err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 373        if (err)
 374                goto relookup_failed;
 375
 376        err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 377        if (err)
 378                goto relookup_failed;
 379
 380        dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 381        if (!IS_ERR(dst2)) {
 382                dst_release(dst);
 383                dst = dst2;
 384        } else {
 385                err = PTR_ERR(dst2);
 386                if (err == -EPERM) {
 387                        dst_release(dst);
 388                        return dst2;
 389                } else
 390                        goto relookup_failed;
 391        }
 392
 393relookup_failed:
 394        if (dst)
 395                return dst;
 396        return ERR_PTR(err);
 397}
 398
 399static int icmp6_iif(const struct sk_buff *skb)
 400{
 401        int iif = skb->dev->ifindex;
 402
 403        /* for local traffic to local address, skb dev is the loopback
 404         * device. Check if there is a dst attached to the skb and if so
 405         * get the real device index.
 406         */
 407        if (unlikely(iif == LOOPBACK_IFINDEX)) {
 408                const struct rt6_info *rt6 = skb_rt6_info(skb);
 409
 410                if (rt6)
 411                        iif = rt6->rt6i_idev->dev->ifindex;
 412        }
 413
 414        return iif;
 415}
 416
 417/*
 418 *      Send an ICMP message in response to a packet in error
 419 */
 420static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 421                       const struct in6_addr *force_saddr)
 422{
 423        struct net *net = dev_net(skb->dev);
 424        struct inet6_dev *idev = NULL;
 425        struct ipv6hdr *hdr = ipv6_hdr(skb);
 426        struct sock *sk;
 427        struct ipv6_pinfo *np;
 428        const struct in6_addr *saddr = NULL;
 429        struct dst_entry *dst;
 430        struct icmp6hdr tmp_hdr;
 431        struct flowi6 fl6;
 432        struct icmpv6_msg msg;
 433        struct sockcm_cookie sockc_unused = {0};
 434        struct ipcm6_cookie ipc6;
 435        int iif = 0;
 436        int addr_type = 0;
 437        int len;
 438        u32 mark = IP6_REPLY_MARK(net, skb->mark);
 439
 440        if ((u8 *)hdr < skb->head ||
 441            (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 442                return;
 443
 444        /*
 445         *      Make sure we respect the rules
 446         *      i.e. RFC 1885 2.4(e)
 447         *      Rule (e.1) is enforced by not using icmp6_send
 448         *      in any code that processes icmp errors.
 449         */
 450        addr_type = ipv6_addr_type(&hdr->daddr);
 451
 452        if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 453            ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 454                saddr = &hdr->daddr;
 455
 456        /*
 457         *      Dest addr check
 458         */
 459
 460        if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 461                if (type != ICMPV6_PKT_TOOBIG &&
 462                    !(type == ICMPV6_PARAMPROB &&
 463                      code == ICMPV6_UNK_OPTION &&
 464                      (opt_unrec(skb, info))))
 465                        return;
 466
 467                saddr = NULL;
 468        }
 469
 470        addr_type = ipv6_addr_type(&hdr->saddr);
 471
 472        /*
 473         *      Source addr check
 474         */
 475
 476        if (__ipv6_addr_needs_scope_id(addr_type)) {
 477                iif = icmp6_iif(skb);
 478        } else {
 479                dst = skb_dst(skb);
 480                iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
 481        }
 482
 483        /*
 484         *      Must not send error if the source does not uniquely
 485         *      identify a single node (RFC2463 Section 2.4).
 486         *      We check unspecified / multicast addresses here,
 487         *      and anycast addresses will be checked later.
 488         */
 489        if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 490                net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 491                                    &hdr->saddr, &hdr->daddr);
 492                return;
 493        }
 494
 495        /*
 496         *      Never answer to a ICMP packet.
 497         */
 498        if (is_ineligible(skb)) {
 499                net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 500                                    &hdr->saddr, &hdr->daddr);
 501                return;
 502        }
 503
 504        /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 505        local_bh_disable();
 506
 507        /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 508        if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
 509                goto out_bh_enable;
 510
 511        mip6_addr_swap(skb);
 512
 513        memset(&fl6, 0, sizeof(fl6));
 514        fl6.flowi6_proto = IPPROTO_ICMPV6;
 515        fl6.daddr = hdr->saddr;
 516        if (force_saddr)
 517                saddr = force_saddr;
 518        if (saddr)
 519                fl6.saddr = *saddr;
 520        fl6.flowi6_mark = mark;
 521        fl6.flowi6_oif = iif;
 522        fl6.fl6_icmp_type = type;
 523        fl6.fl6_icmp_code = code;
 524        fl6.flowi6_uid = sock_net_uid(net, NULL);
 525        fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 526        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 527
 528        sk = icmpv6_xmit_lock(net);
 529        if (!sk)
 530                goto out_bh_enable;
 531
 532        sk->sk_mark = mark;
 533        np = inet6_sk(sk);
 534
 535        if (!icmpv6_xrlim_allow(sk, type, &fl6))
 536                goto out;
 537
 538        tmp_hdr.icmp6_type = type;
 539        tmp_hdr.icmp6_code = code;
 540        tmp_hdr.icmp6_cksum = 0;
 541        tmp_hdr.icmp6_pointer = htonl(info);
 542
 543        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 544                fl6.flowi6_oif = np->mcast_oif;
 545        else if (!fl6.flowi6_oif)
 546                fl6.flowi6_oif = np->ucast_oif;
 547
 548        ipc6.tclass = np->tclass;
 549        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 550
 551        dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 552        if (IS_ERR(dst))
 553                goto out;
 554
 555        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 556        ipc6.dontfrag = np->dontfrag;
 557        ipc6.opt = NULL;
 558
 559        msg.skb = skb;
 560        msg.offset = skb_network_offset(skb);
 561        msg.type = type;
 562
 563        len = skb->len - msg.offset;
 564        len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 565        if (len < 0) {
 566                net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 567                                    &hdr->saddr, &hdr->daddr);
 568                goto out_dst_release;
 569        }
 570
 571        rcu_read_lock();
 572        idev = __in6_dev_get(skb->dev);
 573
 574        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 575                            len + sizeof(struct icmp6hdr),
 576                            sizeof(struct icmp6hdr),
 577                            &ipc6, &fl6, (struct rt6_info *)dst,
 578                            MSG_DONTWAIT, &sockc_unused)) {
 579                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 580                ip6_flush_pending_frames(sk);
 581        } else {
 582                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 583                                           len + sizeof(struct icmp6hdr));
 584        }
 585        rcu_read_unlock();
 586out_dst_release:
 587        dst_release(dst);
 588out:
 589        icmpv6_xmit_unlock(sk);
 590out_bh_enable:
 591        local_bh_enable();
 592}
 593
 594/* Slightly more convenient version of icmp6_send.
 595 */
 596void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 597{
 598        icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 599        kfree_skb(skb);
 600}
 601
 602/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 603 * if sufficient data bytes are available
 604 * @nhs is the size of the tunnel header(s) :
 605 *  Either an IPv4 header for SIT encap
 606 *         an IPv4 header + GRE header for GRE encap
 607 */
 608int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 609                               unsigned int data_len)
 610{
 611        struct in6_addr temp_saddr;
 612        struct rt6_info *rt;
 613        struct sk_buff *skb2;
 614        u32 info = 0;
 615
 616        if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 617                return 1;
 618
 619        /* RFC 4884 (partial) support for ICMP extensions */
 620        if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 621                data_len = 0;
 622
 623        skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 624
 625        if (!skb2)
 626                return 1;
 627
 628        skb_dst_drop(skb2);
 629        skb_pull(skb2, nhs);
 630        skb_reset_network_header(skb2);
 631
 632        rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 633                        skb, 0);
 634
 635        if (rt && rt->dst.dev)
 636                skb2->dev = rt->dst.dev;
 637
 638        ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 639
 640        if (data_len) {
 641                /* RFC 4884 (partial) support :
 642                 * insert 0 padding at the end, before the extensions
 643                 */
 644                __skb_push(skb2, nhs);
 645                skb_reset_network_header(skb2);
 646                memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 647                memset(skb2->data + data_len - nhs, 0, nhs);
 648                /* RFC 4884 4.5 : Length is measured in 64-bit words,
 649                 * and stored in reserved[0]
 650                 */
 651                info = (data_len/8) << 24;
 652        }
 653        if (type == ICMP_TIME_EXCEEDED)
 654                icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 655                           info, &temp_saddr);
 656        else
 657                icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 658                           info, &temp_saddr);
 659        if (rt)
 660                ip6_rt_put(rt);
 661
 662        kfree_skb(skb2);
 663
 664        return 0;
 665}
 666EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 667
 668static void icmpv6_echo_reply(struct sk_buff *skb)
 669{
 670        struct net *net = dev_net(skb->dev);
 671        struct sock *sk;
 672        struct inet6_dev *idev;
 673        struct ipv6_pinfo *np;
 674        const struct in6_addr *saddr = NULL;
 675        struct icmp6hdr *icmph = icmp6_hdr(skb);
 676        struct icmp6hdr tmp_hdr;
 677        struct flowi6 fl6;
 678        struct icmpv6_msg msg;
 679        struct dst_entry *dst;
 680        struct ipcm6_cookie ipc6;
 681        u32 mark = IP6_REPLY_MARK(net, skb->mark);
 682        struct sockcm_cookie sockc_unused = {0};
 683
 684        saddr = &ipv6_hdr(skb)->daddr;
 685
 686        if (!ipv6_unicast_destination(skb) &&
 687            !(net->ipv6.sysctl.anycast_src_echo_reply &&
 688              ipv6_anycast_destination(skb_dst(skb), saddr)))
 689                saddr = NULL;
 690
 691        memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 692        tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 693
 694        memset(&fl6, 0, sizeof(fl6));
 695        fl6.flowi6_proto = IPPROTO_ICMPV6;
 696        fl6.daddr = ipv6_hdr(skb)->saddr;
 697        if (saddr)
 698                fl6.saddr = *saddr;
 699        fl6.flowi6_oif = icmp6_iif(skb);
 700        fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 701        fl6.flowi6_mark = mark;
 702        fl6.flowi6_uid = sock_net_uid(net, NULL);
 703        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 704
 705        local_bh_disable();
 706        sk = icmpv6_xmit_lock(net);
 707        if (!sk)
 708                goto out_bh_enable;
 709        sk->sk_mark = mark;
 710        np = inet6_sk(sk);
 711
 712        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 713                fl6.flowi6_oif = np->mcast_oif;
 714        else if (!fl6.flowi6_oif)
 715                fl6.flowi6_oif = np->ucast_oif;
 716
 717        if (ip6_dst_lookup(net, sk, &dst, &fl6))
 718                goto out;
 719        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 720        if (IS_ERR(dst))
 721                goto out;
 722
 723        idev = __in6_dev_get(skb->dev);
 724
 725        msg.skb = skb;
 726        msg.offset = 0;
 727        msg.type = ICMPV6_ECHO_REPLY;
 728
 729        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 730        ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 731        ipc6.dontfrag = np->dontfrag;
 732        ipc6.opt = NULL;
 733
 734        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 735                            skb->len + sizeof(struct icmp6hdr),
 736                            sizeof(struct icmp6hdr), &ipc6, &fl6,
 737                            (struct rt6_info *)dst, MSG_DONTWAIT,
 738                            &sockc_unused)) {
 739                __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 740                ip6_flush_pending_frames(sk);
 741        } else {
 742                icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 743                                           skb->len + sizeof(struct icmp6hdr));
 744        }
 745        dst_release(dst);
 746out:
 747        icmpv6_xmit_unlock(sk);
 748out_bh_enable:
 749        local_bh_enable();
 750}
 751
 752void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 753{
 754        const struct inet6_protocol *ipprot;
 755        int inner_offset;
 756        __be16 frag_off;
 757        u8 nexthdr;
 758        struct net *net = dev_net(skb->dev);
 759
 760        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 761                goto out;
 762
 763        nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 764        if (ipv6_ext_hdr(nexthdr)) {
 765                /* now skip over extension headers */
 766                inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 767                                                &nexthdr, &frag_off);
 768                if (inner_offset < 0)
 769                        goto out;
 770        } else {
 771                inner_offset = sizeof(struct ipv6hdr);
 772        }
 773
 774        /* Checkin header including 8 bytes of inner protocol header. */
 775        if (!pskb_may_pull(skb, inner_offset+8))
 776                goto out;
 777
 778        /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 779           Without this we will not able f.e. to make source routed
 780           pmtu discovery.
 781           Corresponding argument (opt) to notifiers is already added.
 782           --ANK (980726)
 783         */
 784
 785        ipprot = rcu_dereference(inet6_protos[nexthdr]);
 786        if (ipprot && ipprot->err_handler)
 787                ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 788
 789        raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 790        return;
 791
 792out:
 793        __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 794}
 795
 796/*
 797 *      Handle icmp messages
 798 */
 799
 800static int icmpv6_rcv(struct sk_buff *skb)
 801{
 802        struct net_device *dev = skb->dev;
 803        struct inet6_dev *idev = __in6_dev_get(dev);
 804        const struct in6_addr *saddr, *daddr;
 805        struct icmp6hdr *hdr;
 806        u8 type;
 807        bool success = false;
 808
 809        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 810                struct sec_path *sp = skb_sec_path(skb);
 811                int nh;
 812
 813                if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 814                                 XFRM_STATE_ICMP))
 815                        goto drop_no_count;
 816
 817                if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 818                        goto drop_no_count;
 819
 820                nh = skb_network_offset(skb);
 821                skb_set_network_header(skb, sizeof(*hdr));
 822
 823                if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
 824                        goto drop_no_count;
 825
 826                skb_set_network_header(skb, nh);
 827        }
 828
 829        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 830
 831        saddr = &ipv6_hdr(skb)->saddr;
 832        daddr = &ipv6_hdr(skb)->daddr;
 833
 834        if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 835                net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 836                                    saddr, daddr);
 837                goto csum_error;
 838        }
 839
 840        if (!pskb_pull(skb, sizeof(*hdr)))
 841                goto discard_it;
 842
 843        hdr = icmp6_hdr(skb);
 844
 845        type = hdr->icmp6_type;
 846
 847        ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 848
 849        switch (type) {
 850        case ICMPV6_ECHO_REQUEST:
 851                icmpv6_echo_reply(skb);
 852                break;
 853
 854        case ICMPV6_ECHO_REPLY:
 855                success = ping_rcv(skb);
 856                break;
 857
 858        case ICMPV6_PKT_TOOBIG:
 859                /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 860                   standard destination cache. Seems, only "advanced"
 861                   destination cache will allow to solve this problem
 862                   --ANK (980726)
 863                 */
 864                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 865                        goto discard_it;
 866                hdr = icmp6_hdr(skb);
 867
 868                /* to notify */
 869                /* fall through */
 870        case ICMPV6_DEST_UNREACH:
 871        case ICMPV6_TIME_EXCEED:
 872        case ICMPV6_PARAMPROB:
 873                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 874                break;
 875
 876        case NDISC_ROUTER_SOLICITATION:
 877        case NDISC_ROUTER_ADVERTISEMENT:
 878        case NDISC_NEIGHBOUR_SOLICITATION:
 879        case NDISC_NEIGHBOUR_ADVERTISEMENT:
 880        case NDISC_REDIRECT:
 881                ndisc_rcv(skb);
 882                break;
 883
 884        case ICMPV6_MGM_QUERY:
 885                igmp6_event_query(skb);
 886                break;
 887
 888        case ICMPV6_MGM_REPORT:
 889                igmp6_event_report(skb);
 890                break;
 891
 892        case ICMPV6_MGM_REDUCTION:
 893        case ICMPV6_NI_QUERY:
 894        case ICMPV6_NI_REPLY:
 895        case ICMPV6_MLD2_REPORT:
 896        case ICMPV6_DHAAD_REQUEST:
 897        case ICMPV6_DHAAD_REPLY:
 898        case ICMPV6_MOBILE_PREFIX_SOL:
 899        case ICMPV6_MOBILE_PREFIX_ADV:
 900                break;
 901
 902        default:
 903                /* informational */
 904                if (type & ICMPV6_INFOMSG_MASK)
 905                        break;
 906
 907                net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 908                                    saddr, daddr);
 909
 910                /*
 911                 * error of unknown type.
 912                 * must pass to upper level
 913                 */
 914
 915                icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 916        }
 917
 918        /* until the v6 path can be better sorted assume failure and
 919         * preserve the status quo behaviour for the rest of the paths to here
 920         */
 921        if (success)
 922                consume_skb(skb);
 923        else
 924                kfree_skb(skb);
 925
 926        return 0;
 927
 928csum_error:
 929        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 930discard_it:
 931        __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 932drop_no_count:
 933        kfree_skb(skb);
 934        return 0;
 935}
 936
 937void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 938                      u8 type,
 939                      const struct in6_addr *saddr,
 940                      const struct in6_addr *daddr,
 941                      int oif)
 942{
 943        memset(fl6, 0, sizeof(*fl6));
 944        fl6->saddr = *saddr;
 945        fl6->daddr = *daddr;
 946        fl6->flowi6_proto       = IPPROTO_ICMPV6;
 947        fl6->fl6_icmp_type      = type;
 948        fl6->fl6_icmp_code      = 0;
 949        fl6->flowi6_oif         = oif;
 950        security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 951}
 952
 953static int __net_init icmpv6_sk_init(struct net *net)
 954{
 955        struct sock *sk;
 956        int err, i, j;
 957
 958        net->ipv6.icmp_sk =
 959                kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
 960        if (!net->ipv6.icmp_sk)
 961                return -ENOMEM;
 962
 963        for_each_possible_cpu(i) {
 964                err = inet_ctl_sock_create(&sk, PF_INET6,
 965                                           SOCK_RAW, IPPROTO_ICMPV6, net);
 966                if (err < 0) {
 967                        pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
 968                               err);
 969                        goto fail;
 970                }
 971
 972                net->ipv6.icmp_sk[i] = sk;
 973
 974                /* Enough space for 2 64K ICMP packets, including
 975                 * sk_buff struct overhead.
 976                 */
 977                sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
 978        }
 979        return 0;
 980
 981 fail:
 982        for (j = 0; j < i; j++)
 983                inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
 984        kfree(net->ipv6.icmp_sk);
 985        return err;
 986}
 987
 988static void __net_exit icmpv6_sk_exit(struct net *net)
 989{
 990        int i;
 991
 992        for_each_possible_cpu(i) {
 993                inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
 994        }
 995        kfree(net->ipv6.icmp_sk);
 996}
 997
 998static struct pernet_operations icmpv6_sk_ops = {
 999        .init = icmpv6_sk_init,
1000        .exit = icmpv6_sk_exit,
1001};
1002
1003int __init icmpv6_init(void)
1004{
1005        int err;
1006
1007        err = register_pernet_subsys(&icmpv6_sk_ops);
1008        if (err < 0)
1009                return err;
1010
1011        err = -EAGAIN;
1012        if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1013                goto fail;
1014
1015        err = inet6_register_icmp_sender(icmp6_send);
1016        if (err)
1017                goto sender_reg_err;
1018        return 0;
1019
1020sender_reg_err:
1021        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1022fail:
1023        pr_err("Failed to register ICMP6 protocol\n");
1024        unregister_pernet_subsys(&icmpv6_sk_ops);
1025        return err;
1026}
1027
1028void icmpv6_cleanup(void)
1029{
1030        inet6_unregister_icmp_sender(icmp6_send);
1031        unregister_pernet_subsys(&icmpv6_sk_ops);
1032        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1033}
1034
1035
1036static const struct icmp6_err {
1037        int err;
1038        int fatal;
1039} tab_unreach[] = {
1040        {       /* NOROUTE */
1041                .err    = ENETUNREACH,
1042                .fatal  = 0,
1043        },
1044        {       /* ADM_PROHIBITED */
1045                .err    = EACCES,
1046                .fatal  = 1,
1047        },
1048        {       /* Was NOT_NEIGHBOUR, now reserved */
1049                .err    = EHOSTUNREACH,
1050                .fatal  = 0,
1051        },
1052        {       /* ADDR_UNREACH */
1053                .err    = EHOSTUNREACH,
1054                .fatal  = 0,
1055        },
1056        {       /* PORT_UNREACH */
1057                .err    = ECONNREFUSED,
1058                .fatal  = 1,
1059        },
1060        {       /* POLICY_FAIL */
1061                .err    = EACCES,
1062                .fatal  = 1,
1063        },
1064        {       /* REJECT_ROUTE */
1065                .err    = EACCES,
1066                .fatal  = 1,
1067        },
1068};
1069
1070int icmpv6_err_convert(u8 type, u8 code, int *err)
1071{
1072        int fatal = 0;
1073
1074        *err = EPROTO;
1075
1076        switch (type) {
1077        case ICMPV6_DEST_UNREACH:
1078                fatal = 1;
1079                if (code < ARRAY_SIZE(tab_unreach)) {
1080                        *err  = tab_unreach[code].err;
1081                        fatal = tab_unreach[code].fatal;
1082                }
1083                break;
1084
1085        case ICMPV6_PKT_TOOBIG:
1086                *err = EMSGSIZE;
1087                break;
1088
1089        case ICMPV6_PARAMPROB:
1090                *err = EPROTO;
1091                fatal = 1;
1092                break;
1093
1094        case ICMPV6_TIME_EXCEED:
1095                *err = EHOSTUNREACH;
1096                break;
1097        }
1098
1099        return fatal;
1100}
1101EXPORT_SYMBOL(icmpv6_err_convert);
1102
1103#ifdef CONFIG_SYSCTL
1104static struct ctl_table ipv6_icmp_table_template[] = {
1105        {
1106                .procname       = "ratelimit",
1107                .data           = &init_net.ipv6.sysctl.icmpv6_time,
1108                .maxlen         = sizeof(int),
1109                .mode           = 0644,
1110                .proc_handler   = proc_dointvec_ms_jiffies,
1111        },
1112        { },
1113};
1114
1115struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1116{
1117        struct ctl_table *table;
1118
1119        table = kmemdup(ipv6_icmp_table_template,
1120                        sizeof(ipv6_icmp_table_template),
1121                        GFP_KERNEL);
1122
1123        if (table)
1124                table[0].data = &net->ipv6.sysctl.icmpv6_time;
1125
1126        return table;
1127}
1128#endif
1129