linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 output functions
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on linux/net/ipv4/ip_output.c
  10 *
  11 *      Changes:
  12 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13 *                              extension headers are implemented.
  14 *                              route changes now work.
  15 *                              ip6_forward does not confuse sniffers.
  16 *                              etc.
  17 *
  18 *      H. von Brand    :       Added missing #include <linux/string.h>
  19 *      Imran Patel     :       frag id should be in NBO
  20 *      Kazunori MIYAZAWA @USAGI
  21 *                      :       add ip6_append_data and related functions
  22 *                              for datagram xmit
  23 */
  24
  25#include <linux/errno.h>
  26#include <linux/kernel.h>
  27#include <linux/string.h>
  28#include <linux/socket.h>
  29#include <linux/net.h>
  30#include <linux/netdevice.h>
  31#include <linux/if_arp.h>
  32#include <linux/in6.h>
  33#include <linux/tcp.h>
  34#include <linux/route.h>
  35#include <linux/module.h>
  36#include <linux/slab.h>
  37
  38#include <linux/bpf-cgroup.h>
  39#include <linux/netfilter.h>
  40#include <linux/netfilter_ipv6.h>
  41
  42#include <net/sock.h>
  43#include <net/snmp.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ndisc.h>
  47#include <net/protocol.h>
  48#include <net/ip6_route.h>
  49#include <net/addrconf.h>
  50#include <net/rawv6.h>
  51#include <net/icmp.h>
  52#include <net/xfrm.h>
  53#include <net/checksum.h>
  54#include <linux/mroute6.h>
  55#include <net/l3mdev.h>
  56#include <net/lwtunnel.h>
  57#include <net/ip_tunnels.h>
  58
  59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  60{
  61        struct dst_entry *dst = skb_dst(skb);
  62        struct net_device *dev = dst->dev;
  63        const struct in6_addr *nexthop;
  64        struct neighbour *neigh;
  65        int ret;
  66
  67        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  68                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  69
  70                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  71                    ((mroute6_is_socket(net, skb) &&
  72                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  73                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  74                                         &ipv6_hdr(skb)->saddr))) {
  75                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  76
  77                        /* Do not check for IFF_ALLMULTI; multicast routing
  78                           is not supported in any case.
  79                         */
  80                        if (newskb)
  81                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  82                                        net, sk, newskb, NULL, newskb->dev,
  83                                        dev_loopback_xmit);
  84
  85                        if (ipv6_hdr(skb)->hop_limit == 0) {
  86                                IP6_INC_STATS(net, idev,
  87                                              IPSTATS_MIB_OUTDISCARDS);
  88                                kfree_skb(skb);
  89                                return 0;
  90                        }
  91                }
  92
  93                IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  94
  95                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  96                    IPV6_ADDR_SCOPE_NODELOCAL &&
  97                    !(dev->flags & IFF_LOOPBACK)) {
  98                        kfree_skb(skb);
  99                        return 0;
 100                }
 101        }
 102
 103        if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 104                int res = lwtunnel_xmit(skb);
 105
 106                if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 107                        return res;
 108        }
 109
 110        rcu_read_lock_bh();
 111        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 112        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 113        if (unlikely(!neigh))
 114                neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 115        if (!IS_ERR(neigh)) {
 116                sock_confirm_neigh(skb, neigh);
 117                ret = neigh_output(neigh, skb, false);
 118                rcu_read_unlock_bh();
 119                return ret;
 120        }
 121        rcu_read_unlock_bh();
 122
 123        IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 124        kfree_skb(skb);
 125        return -EINVAL;
 126}
 127
 128static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 129{
 130#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 131        /* Policy lookup after SNAT yielded a new policy */
 132        if (skb_dst(skb)->xfrm) {
 133                IPCB(skb)->flags |= IPSKB_REROUTED;
 134                return dst_output(net, sk, skb);
 135        }
 136#endif
 137
 138        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 139            dst_allfrag(skb_dst(skb)) ||
 140            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 141                return ip6_fragment(net, sk, skb, ip6_finish_output2);
 142        else
 143                return ip6_finish_output2(net, sk, skb);
 144}
 145
 146static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 147{
 148        int ret;
 149
 150        ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 151        switch (ret) {
 152        case NET_XMIT_SUCCESS:
 153                return __ip6_finish_output(net, sk, skb);
 154        case NET_XMIT_CN:
 155                return __ip6_finish_output(net, sk, skb) ? : ret;
 156        default:
 157                kfree_skb(skb);
 158                return ret;
 159        }
 160}
 161
 162int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 163{
 164        struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 165        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 166
 167        skb->protocol = htons(ETH_P_IPV6);
 168        skb->dev = dev;
 169
 170        if (unlikely(idev->cnf.disable_ipv6)) {
 171                IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 172                kfree_skb(skb);
 173                return 0;
 174        }
 175
 176        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 177                            net, sk, skb, indev, dev,
 178                            ip6_finish_output,
 179                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 180}
 181
 182bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 183{
 184        if (!np->autoflowlabel_set)
 185                return ip6_default_np_autolabel(net);
 186        else
 187                return np->autoflowlabel;
 188}
 189
 190/*
 191 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 192 * Note : socket lock is not held for SYNACK packets, but might be modified
 193 * by calls to skb_set_owner_w() and ipv6_local_error(),
 194 * which are using proper atomic operations or spinlocks.
 195 */
 196int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 197             __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 198{
 199        struct net *net = sock_net(sk);
 200        const struct ipv6_pinfo *np = inet6_sk(sk);
 201        struct in6_addr *first_hop = &fl6->daddr;
 202        struct dst_entry *dst = skb_dst(skb);
 203        unsigned int head_room;
 204        struct ipv6hdr *hdr;
 205        u8  proto = fl6->flowi6_proto;
 206        int seg_len = skb->len;
 207        int hlimit = -1;
 208        u32 mtu;
 209
 210        head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 211        if (opt)
 212                head_room += opt->opt_nflen + opt->opt_flen;
 213
 214        if (unlikely(skb_headroom(skb) < head_room)) {
 215                struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 216                if (!skb2) {
 217                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 218                                      IPSTATS_MIB_OUTDISCARDS);
 219                        kfree_skb(skb);
 220                        return -ENOBUFS;
 221                }
 222                if (skb->sk)
 223                        skb_set_owner_w(skb2, skb->sk);
 224                consume_skb(skb);
 225                skb = skb2;
 226        }
 227
 228        if (opt) {
 229                seg_len += opt->opt_nflen + opt->opt_flen;
 230
 231                if (opt->opt_flen)
 232                        ipv6_push_frag_opts(skb, opt, &proto);
 233
 234                if (opt->opt_nflen)
 235                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 236                                             &fl6->saddr);
 237        }
 238
 239        skb_push(skb, sizeof(struct ipv6hdr));
 240        skb_reset_network_header(skb);
 241        hdr = ipv6_hdr(skb);
 242
 243        /*
 244         *      Fill in the IPv6 header
 245         */
 246        if (np)
 247                hlimit = np->hop_limit;
 248        if (hlimit < 0)
 249                hlimit = ip6_dst_hoplimit(dst);
 250
 251        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 252                                ip6_autoflowlabel(net, np), fl6));
 253
 254        hdr->payload_len = htons(seg_len);
 255        hdr->nexthdr = proto;
 256        hdr->hop_limit = hlimit;
 257
 258        hdr->saddr = fl6->saddr;
 259        hdr->daddr = *first_hop;
 260
 261        skb->protocol = htons(ETH_P_IPV6);
 262        skb->priority = priority;
 263        skb->mark = mark;
 264
 265        mtu = dst_mtu(dst);
 266        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 267                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 268                              IPSTATS_MIB_OUT, skb->len);
 269
 270                /* if egress device is enslaved to an L3 master device pass the
 271                 * skb to its handler for processing
 272                 */
 273                skb = l3mdev_ip6_out((struct sock *)sk, skb);
 274                if (unlikely(!skb))
 275                        return 0;
 276
 277                /* hooks should never assume socket lock is held.
 278                 * we promote our socket to non const
 279                 */
 280                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 281                               net, (struct sock *)sk, skb, NULL, dst->dev,
 282                               dst_output);
 283        }
 284
 285        skb->dev = dst->dev;
 286        /* ipv6_local_error() does not require socket lock,
 287         * we promote our socket to non const
 288         */
 289        ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 290
 291        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 292        kfree_skb(skb);
 293        return -EMSGSIZE;
 294}
 295EXPORT_SYMBOL(ip6_xmit);
 296
 297static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 298{
 299        struct ip6_ra_chain *ra;
 300        struct sock *last = NULL;
 301
 302        read_lock(&ip6_ra_lock);
 303        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 304                struct sock *sk = ra->sk;
 305                if (sk && ra->sel == sel &&
 306                    (!sk->sk_bound_dev_if ||
 307                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 308                        struct ipv6_pinfo *np = inet6_sk(sk);
 309
 310                        if (np && np->rtalert_isolate &&
 311                            !net_eq(sock_net(sk), dev_net(skb->dev))) {
 312                                continue;
 313                        }
 314                        if (last) {
 315                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 316                                if (skb2)
 317                                        rawv6_rcv(last, skb2);
 318                        }
 319                        last = sk;
 320                }
 321        }
 322
 323        if (last) {
 324                rawv6_rcv(last, skb);
 325                read_unlock(&ip6_ra_lock);
 326                return 1;
 327        }
 328        read_unlock(&ip6_ra_lock);
 329        return 0;
 330}
 331
 332static int ip6_forward_proxy_check(struct sk_buff *skb)
 333{
 334        struct ipv6hdr *hdr = ipv6_hdr(skb);
 335        u8 nexthdr = hdr->nexthdr;
 336        __be16 frag_off;
 337        int offset;
 338
 339        if (ipv6_ext_hdr(nexthdr)) {
 340                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 341                if (offset < 0)
 342                        return 0;
 343        } else
 344                offset = sizeof(struct ipv6hdr);
 345
 346        if (nexthdr == IPPROTO_ICMPV6) {
 347                struct icmp6hdr *icmp6;
 348
 349                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 350                                         offset + 1 - skb->data)))
 351                        return 0;
 352
 353                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 354
 355                switch (icmp6->icmp6_type) {
 356                case NDISC_ROUTER_SOLICITATION:
 357                case NDISC_ROUTER_ADVERTISEMENT:
 358                case NDISC_NEIGHBOUR_SOLICITATION:
 359                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 360                case NDISC_REDIRECT:
 361                        /* For reaction involving unicast neighbor discovery
 362                         * message destined to the proxied address, pass it to
 363                         * input function.
 364                         */
 365                        return 1;
 366                default:
 367                        break;
 368                }
 369        }
 370
 371        /*
 372         * The proxying router can't forward traffic sent to a link-local
 373         * address, so signal the sender and discard the packet. This
 374         * behavior is clarified by the MIPv6 specification.
 375         */
 376        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 377                dst_link_failure(skb);
 378                return -1;
 379        }
 380
 381        return 0;
 382}
 383
 384static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 385                                     struct sk_buff *skb)
 386{
 387        struct dst_entry *dst = skb_dst(skb);
 388
 389        __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 390        __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 391
 392#ifdef CONFIG_NET_SWITCHDEV
 393        if (skb->offload_l3_fwd_mark) {
 394                consume_skb(skb);
 395                return 0;
 396        }
 397#endif
 398
 399        skb->tstamp = 0;
 400        return dst_output(net, sk, skb);
 401}
 402
 403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 404{
 405        if (skb->len <= mtu)
 406                return false;
 407
 408        /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 409        if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 410                return true;
 411
 412        if (skb->ignore_df)
 413                return false;
 414
 415        if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 416                return false;
 417
 418        return true;
 419}
 420
 421int ip6_forward(struct sk_buff *skb)
 422{
 423        struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 424        struct dst_entry *dst = skb_dst(skb);
 425        struct ipv6hdr *hdr = ipv6_hdr(skb);
 426        struct inet6_skb_parm *opt = IP6CB(skb);
 427        struct net *net = dev_net(dst->dev);
 428        u32 mtu;
 429
 430        if (net->ipv6.devconf_all->forwarding == 0)
 431                goto error;
 432
 433        if (skb->pkt_type != PACKET_HOST)
 434                goto drop;
 435
 436        if (unlikely(skb->sk))
 437                goto drop;
 438
 439        if (skb_warn_if_lro(skb))
 440                goto drop;
 441
 442        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 443                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 444                goto drop;
 445        }
 446
 447        skb_forward_csum(skb);
 448
 449        /*
 450         *      We DO NOT make any processing on
 451         *      RA packets, pushing them to user level AS IS
 452         *      without ane WARRANTY that application will be able
 453         *      to interpret them. The reason is that we
 454         *      cannot make anything clever here.
 455         *
 456         *      We are not end-node, so that if packet contains
 457         *      AH/ESP, we cannot make anything.
 458         *      Defragmentation also would be mistake, RA packets
 459         *      cannot be fragmented, because there is no warranty
 460         *      that different fragments will go along one path. --ANK
 461         */
 462        if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 463                if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 464                        return 0;
 465        }
 466
 467        /*
 468         *      check and decrement ttl
 469         */
 470        if (hdr->hop_limit <= 1) {
 471                /* Force OUTPUT device used as source address */
 472                skb->dev = dst->dev;
 473                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 474                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 475
 476                kfree_skb(skb);
 477                return -ETIMEDOUT;
 478        }
 479
 480        /* XXX: idev->cnf.proxy_ndp? */
 481        if (net->ipv6.devconf_all->proxy_ndp &&
 482            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 483                int proxied = ip6_forward_proxy_check(skb);
 484                if (proxied > 0)
 485                        return ip6_input(skb);
 486                else if (proxied < 0) {
 487                        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 488                        goto drop;
 489                }
 490        }
 491
 492        if (!xfrm6_route_forward(skb)) {
 493                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 494                goto drop;
 495        }
 496        dst = skb_dst(skb);
 497
 498        /* IPv6 specs say nothing about it, but it is clear that we cannot
 499           send redirects to source routed frames.
 500           We don't send redirects to frames decapsulated from IPsec.
 501         */
 502        if (IP6CB(skb)->iif == dst->dev->ifindex &&
 503            opt->srcrt == 0 && !skb_sec_path(skb)) {
 504                struct in6_addr *target = NULL;
 505                struct inet_peer *peer;
 506                struct rt6_info *rt;
 507
 508                /*
 509                 *      incoming and outgoing devices are the same
 510                 *      send a redirect.
 511                 */
 512
 513                rt = (struct rt6_info *) dst;
 514                if (rt->rt6i_flags & RTF_GATEWAY)
 515                        target = &rt->rt6i_gateway;
 516                else
 517                        target = &hdr->daddr;
 518
 519                peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 520
 521                /* Limit redirects both by destination (here)
 522                   and by source (inside ndisc_send_redirect)
 523                 */
 524                if (inet_peer_xrlim_allow(peer, 1*HZ))
 525                        ndisc_send_redirect(skb, target);
 526                if (peer)
 527                        inet_putpeer(peer);
 528        } else {
 529                int addrtype = ipv6_addr_type(&hdr->saddr);
 530
 531                /* This check is security critical. */
 532                if (addrtype == IPV6_ADDR_ANY ||
 533                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 534                        goto error;
 535                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 536                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 537                                    ICMPV6_NOT_NEIGHBOUR, 0);
 538                        goto error;
 539                }
 540        }
 541
 542        mtu = ip6_dst_mtu_forward(dst);
 543        if (mtu < IPV6_MIN_MTU)
 544                mtu = IPV6_MIN_MTU;
 545
 546        if (ip6_pkt_too_big(skb, mtu)) {
 547                /* Again, force OUTPUT device used as source address */
 548                skb->dev = dst->dev;
 549                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 550                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 551                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 552                                IPSTATS_MIB_FRAGFAILS);
 553                kfree_skb(skb);
 554                return -EMSGSIZE;
 555        }
 556
 557        if (skb_cow(skb, dst->dev->hard_header_len)) {
 558                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 559                                IPSTATS_MIB_OUTDISCARDS);
 560                goto drop;
 561        }
 562
 563        hdr = ipv6_hdr(skb);
 564
 565        /* Mangling hops number delayed to point after skb COW */
 566
 567        hdr->hop_limit--;
 568
 569        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 570                       net, NULL, skb, skb->dev, dst->dev,
 571                       ip6_forward_finish);
 572
 573error:
 574        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 575drop:
 576        kfree_skb(skb);
 577        return -EINVAL;
 578}
 579
 580static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 581{
 582        to->pkt_type = from->pkt_type;
 583        to->priority = from->priority;
 584        to->protocol = from->protocol;
 585        skb_dst_drop(to);
 586        skb_dst_set(to, dst_clone(skb_dst(from)));
 587        to->dev = from->dev;
 588        to->mark = from->mark;
 589
 590        skb_copy_hash(to, from);
 591
 592#ifdef CONFIG_NET_SCHED
 593        to->tc_index = from->tc_index;
 594#endif
 595        nf_copy(to, from);
 596        skb_ext_copy(to, from);
 597        skb_copy_secmark(to, from);
 598}
 599
 600int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 601                      u8 nexthdr, __be32 frag_id,
 602                      struct ip6_fraglist_iter *iter)
 603{
 604        unsigned int first_len;
 605        struct frag_hdr *fh;
 606
 607        /* BUILD HEADER */
 608        *prevhdr = NEXTHDR_FRAGMENT;
 609        iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 610        if (!iter->tmp_hdr)
 611                return -ENOMEM;
 612
 613        iter->frag = skb_shinfo(skb)->frag_list;
 614        skb_frag_list_init(skb);
 615
 616        iter->offset = 0;
 617        iter->hlen = hlen;
 618        iter->frag_id = frag_id;
 619        iter->nexthdr = nexthdr;
 620
 621        __skb_pull(skb, hlen);
 622        fh = __skb_push(skb, sizeof(struct frag_hdr));
 623        __skb_push(skb, hlen);
 624        skb_reset_network_header(skb);
 625        memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 626
 627        fh->nexthdr = nexthdr;
 628        fh->reserved = 0;
 629        fh->frag_off = htons(IP6_MF);
 630        fh->identification = frag_id;
 631
 632        first_len = skb_pagelen(skb);
 633        skb->data_len = first_len - skb_headlen(skb);
 634        skb->len = first_len;
 635        ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 636
 637        return 0;
 638}
 639EXPORT_SYMBOL(ip6_fraglist_init);
 640
 641void ip6_fraglist_prepare(struct sk_buff *skb,
 642                          struct ip6_fraglist_iter *iter)
 643{
 644        struct sk_buff *frag = iter->frag;
 645        unsigned int hlen = iter->hlen;
 646        struct frag_hdr *fh;
 647
 648        frag->ip_summed = CHECKSUM_NONE;
 649        skb_reset_transport_header(frag);
 650        fh = __skb_push(frag, sizeof(struct frag_hdr));
 651        __skb_push(frag, hlen);
 652        skb_reset_network_header(frag);
 653        memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 654        iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 655        fh->nexthdr = iter->nexthdr;
 656        fh->reserved = 0;
 657        fh->frag_off = htons(iter->offset);
 658        if (frag->next)
 659                fh->frag_off |= htons(IP6_MF);
 660        fh->identification = iter->frag_id;
 661        ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 662        ip6_copy_metadata(frag, skb);
 663}
 664EXPORT_SYMBOL(ip6_fraglist_prepare);
 665
 666void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 667                   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 668                   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 669{
 670        state->prevhdr = prevhdr;
 671        state->nexthdr = nexthdr;
 672        state->frag_id = frag_id;
 673
 674        state->hlen = hlen;
 675        state->mtu = mtu;
 676
 677        state->left = skb->len - hlen;  /* Space per frame */
 678        state->ptr = hlen;              /* Where to start from */
 679
 680        state->hroom = hdr_room;
 681        state->troom = needed_tailroom;
 682
 683        state->offset = 0;
 684}
 685EXPORT_SYMBOL(ip6_frag_init);
 686
 687struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 688{
 689        u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 690        struct sk_buff *frag;
 691        struct frag_hdr *fh;
 692        unsigned int len;
 693
 694        len = state->left;
 695        /* IF: it doesn't fit, use 'mtu' - the data space left */
 696        if (len > state->mtu)
 697                len = state->mtu;
 698        /* IF: we are not sending up to and including the packet end
 699           then align the next start on an eight byte boundary */
 700        if (len < state->left)
 701                len &= ~7;
 702
 703        /* Allocate buffer */
 704        frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 705                         state->hroom + state->troom, GFP_ATOMIC);
 706        if (!frag)
 707                return ERR_PTR(-ENOMEM);
 708
 709        /*
 710         *      Set up data on packet
 711         */
 712
 713        ip6_copy_metadata(frag, skb);
 714        skb_reserve(frag, state->hroom);
 715        skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 716        skb_reset_network_header(frag);
 717        fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 718        frag->transport_header = (frag->network_header + state->hlen +
 719                                  sizeof(struct frag_hdr));
 720
 721        /*
 722         *      Charge the memory for the fragment to any owner
 723         *      it might possess
 724         */
 725        if (skb->sk)
 726                skb_set_owner_w(frag, skb->sk);
 727
 728        /*
 729         *      Copy the packet header into the new buffer.
 730         */
 731        skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 732
 733        fragnexthdr_offset = skb_network_header(frag);
 734        fragnexthdr_offset += prevhdr - skb_network_header(skb);
 735        *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 736
 737        /*
 738         *      Build fragment header.
 739         */
 740        fh->nexthdr = state->nexthdr;
 741        fh->reserved = 0;
 742        fh->identification = state->frag_id;
 743
 744        /*
 745         *      Copy a block of the IP datagram.
 746         */
 747        BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 748                             len));
 749        state->left -= len;
 750
 751        fh->frag_off = htons(state->offset);
 752        if (state->left > 0)
 753                fh->frag_off |= htons(IP6_MF);
 754        ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 755
 756        state->ptr += len;
 757        state->offset += len;
 758
 759        return frag;
 760}
 761EXPORT_SYMBOL(ip6_frag_next);
 762
 763int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 764                 int (*output)(struct net *, struct sock *, struct sk_buff *))
 765{
 766        struct sk_buff *frag;
 767        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 768        struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 769                                inet6_sk(skb->sk) : NULL;
 770        struct ip6_frag_state state;
 771        unsigned int mtu, hlen, nexthdr_offset;
 772        ktime_t tstamp = skb->tstamp;
 773        int hroom, err = 0;
 774        __be32 frag_id;
 775        u8 *prevhdr, nexthdr = 0;
 776
 777        err = ip6_find_1stfragopt(skb, &prevhdr);
 778        if (err < 0)
 779                goto fail;
 780        hlen = err;
 781        nexthdr = *prevhdr;
 782        nexthdr_offset = prevhdr - skb_network_header(skb);
 783
 784        mtu = ip6_skb_dst_mtu(skb);
 785
 786        /* We must not fragment if the socket is set to force MTU discovery
 787         * or if the skb it not generated by a local socket.
 788         */
 789        if (unlikely(!skb->ignore_df && skb->len > mtu))
 790                goto fail_toobig;
 791
 792        if (IP6CB(skb)->frag_max_size) {
 793                if (IP6CB(skb)->frag_max_size > mtu)
 794                        goto fail_toobig;
 795
 796                /* don't send fragments larger than what we received */
 797                mtu = IP6CB(skb)->frag_max_size;
 798                if (mtu < IPV6_MIN_MTU)
 799                        mtu = IPV6_MIN_MTU;
 800        }
 801
 802        if (np && np->frag_size < mtu) {
 803                if (np->frag_size)
 804                        mtu = np->frag_size;
 805        }
 806        if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 807                goto fail_toobig;
 808        mtu -= hlen + sizeof(struct frag_hdr);
 809
 810        frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 811                                    &ipv6_hdr(skb)->saddr);
 812
 813        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 814            (err = skb_checksum_help(skb)))
 815                goto fail;
 816
 817        prevhdr = skb_network_header(skb) + nexthdr_offset;
 818        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 819        if (skb_has_frag_list(skb)) {
 820                unsigned int first_len = skb_pagelen(skb);
 821                struct ip6_fraglist_iter iter;
 822                struct sk_buff *frag2;
 823
 824                if (first_len - hlen > mtu ||
 825                    ((first_len - hlen) & 7) ||
 826                    skb_cloned(skb) ||
 827                    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 828                        goto slow_path;
 829
 830                skb_walk_frags(skb, frag) {
 831                        /* Correct geometry. */
 832                        if (frag->len > mtu ||
 833                            ((frag->len & 7) && frag->next) ||
 834                            skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 835                                goto slow_path_clean;
 836
 837                        /* Partially cloned skb? */
 838                        if (skb_shared(frag))
 839                                goto slow_path_clean;
 840
 841                        BUG_ON(frag->sk);
 842                        if (skb->sk) {
 843                                frag->sk = skb->sk;
 844                                frag->destructor = sock_wfree;
 845                        }
 846                        skb->truesize -= frag->truesize;
 847                }
 848
 849                err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 850                                        &iter);
 851                if (err < 0)
 852                        goto fail;
 853
 854                for (;;) {
 855                        /* Prepare header of the next frame,
 856                         * before previous one went down. */
 857                        if (iter.frag)
 858                                ip6_fraglist_prepare(skb, &iter);
 859
 860                        skb->tstamp = tstamp;
 861                        err = output(net, sk, skb);
 862                        if (!err)
 863                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 864                                              IPSTATS_MIB_FRAGCREATES);
 865
 866                        if (err || !iter.frag)
 867                                break;
 868
 869                        skb = ip6_fraglist_next(&iter);
 870                }
 871
 872                kfree(iter.tmp_hdr);
 873
 874                if (err == 0) {
 875                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 876                                      IPSTATS_MIB_FRAGOKS);
 877                        return 0;
 878                }
 879
 880                kfree_skb_list(iter.frag);
 881
 882                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 883                              IPSTATS_MIB_FRAGFAILS);
 884                return err;
 885
 886slow_path_clean:
 887                skb_walk_frags(skb, frag2) {
 888                        if (frag2 == frag)
 889                                break;
 890                        frag2->sk = NULL;
 891                        frag2->destructor = NULL;
 892                        skb->truesize += frag2->truesize;
 893                }
 894        }
 895
 896slow_path:
 897        /*
 898         *      Fragment the datagram.
 899         */
 900
 901        ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 902                      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 903                      &state);
 904
 905        /*
 906         *      Keep copying data until we run out.
 907         */
 908
 909        while (state.left > 0) {
 910                frag = ip6_frag_next(skb, &state);
 911                if (IS_ERR(frag)) {
 912                        err = PTR_ERR(frag);
 913                        goto fail;
 914                }
 915
 916                /*
 917                 *      Put this fragment into the sending queue.
 918                 */
 919                frag->tstamp = tstamp;
 920                err = output(net, sk, frag);
 921                if (err)
 922                        goto fail;
 923
 924                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 925                              IPSTATS_MIB_FRAGCREATES);
 926        }
 927        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 928                      IPSTATS_MIB_FRAGOKS);
 929        consume_skb(skb);
 930        return err;
 931
 932fail_toobig:
 933        if (skb->sk && dst_allfrag(skb_dst(skb)))
 934                sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 935
 936        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 937        err = -EMSGSIZE;
 938
 939fail:
 940        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 941                      IPSTATS_MIB_FRAGFAILS);
 942        kfree_skb(skb);
 943        return err;
 944}
 945
 946static inline int ip6_rt_check(const struct rt6key *rt_key,
 947                               const struct in6_addr *fl_addr,
 948                               const struct in6_addr *addr_cache)
 949{
 950        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 951                (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 952}
 953
 954static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 955                                          struct dst_entry *dst,
 956                                          const struct flowi6 *fl6)
 957{
 958        struct ipv6_pinfo *np = inet6_sk(sk);
 959        struct rt6_info *rt;
 960
 961        if (!dst)
 962                goto out;
 963
 964        if (dst->ops->family != AF_INET6) {
 965                dst_release(dst);
 966                return NULL;
 967        }
 968
 969        rt = (struct rt6_info *)dst;
 970        /* Yes, checking route validity in not connected
 971         * case is not very simple. Take into account,
 972         * that we do not support routing by source, TOS,
 973         * and MSG_DONTROUTE            --ANK (980726)
 974         *
 975         * 1. ip6_rt_check(): If route was host route,
 976         *    check that cached destination is current.
 977         *    If it is network route, we still may
 978         *    check its validity using saved pointer
 979         *    to the last used address: daddr_cache.
 980         *    We do not want to save whole address now,
 981         *    (because main consumer of this service
 982         *    is tcp, which has not this problem),
 983         *    so that the last trick works only on connected
 984         *    sockets.
 985         * 2. oif also should be the same.
 986         */
 987        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 988#ifdef CONFIG_IPV6_SUBTREES
 989            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 990#endif
 991           (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 992              (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 993                dst_release(dst);
 994                dst = NULL;
 995        }
 996
 997out:
 998        return dst;
 999}
1000
1001static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1002                               struct dst_entry **dst, struct flowi6 *fl6)
1003{
1004#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005        struct neighbour *n;
1006        struct rt6_info *rt;
1007#endif
1008        int err;
1009        int flags = 0;
1010
1011        /* The correct way to handle this would be to do
1012         * ip6_route_get_saddr, and then ip6_route_output; however,
1013         * the route-specific preferred source forces the
1014         * ip6_route_output call _before_ ip6_route_get_saddr.
1015         *
1016         * In source specific routing (no src=any default route),
1017         * ip6_route_output will fail given src=any saddr, though, so
1018         * that's why we try it again later.
1019         */
1020        if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1021                struct fib6_info *from;
1022                struct rt6_info *rt;
1023                bool had_dst = *dst != NULL;
1024
1025                if (!had_dst)
1026                        *dst = ip6_route_output(net, sk, fl6);
1027                rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1028
1029                rcu_read_lock();
1030                from = rt ? rcu_dereference(rt->from) : NULL;
1031                err = ip6_route_get_saddr(net, from, &fl6->daddr,
1032                                          sk ? inet6_sk(sk)->srcprefs : 0,
1033                                          &fl6->saddr);
1034                rcu_read_unlock();
1035
1036                if (err)
1037                        goto out_err_release;
1038
1039                /* If we had an erroneous initial result, pretend it
1040                 * never existed and let the SA-enabled version take
1041                 * over.
1042                 */
1043                if (!had_dst && (*dst)->error) {
1044                        dst_release(*dst);
1045                        *dst = NULL;
1046                }
1047
1048                if (fl6->flowi6_oif)
1049                        flags |= RT6_LOOKUP_F_IFACE;
1050        }
1051
1052        if (!*dst)
1053                *dst = ip6_route_output_flags(net, sk, fl6, flags);
1054
1055        err = (*dst)->error;
1056        if (err)
1057                goto out_err_release;
1058
1059#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1060        /*
1061         * Here if the dst entry we've looked up
1062         * has a neighbour entry that is in the INCOMPLETE
1063         * state and the src address from the flow is
1064         * marked as OPTIMISTIC, we release the found
1065         * dst entry and replace it instead with the
1066         * dst entry of the nexthop router
1067         */
1068        rt = (struct rt6_info *) *dst;
1069        rcu_read_lock_bh();
1070        n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1071                                      rt6_nexthop(rt, &fl6->daddr));
1072        err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1073        rcu_read_unlock_bh();
1074
1075        if (err) {
1076                struct inet6_ifaddr *ifp;
1077                struct flowi6 fl_gw6;
1078                int redirect;
1079
1080                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1081                                      (*dst)->dev, 1);
1082
1083                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1084                if (ifp)
1085                        in6_ifa_put(ifp);
1086
1087                if (redirect) {
1088                        /*
1089                         * We need to get the dst entry for the
1090                         * default router instead
1091                         */
1092                        dst_release(*dst);
1093                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1094                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1095                        *dst = ip6_route_output(net, sk, &fl_gw6);
1096                        err = (*dst)->error;
1097                        if (err)
1098                                goto out_err_release;
1099                }
1100        }
1101#endif
1102        if (ipv6_addr_v4mapped(&fl6->saddr) &&
1103            !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1104                err = -EAFNOSUPPORT;
1105                goto out_err_release;
1106        }
1107
1108        return 0;
1109
1110out_err_release:
1111        dst_release(*dst);
1112        *dst = NULL;
1113
1114        if (err == -ENETUNREACH)
1115                IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1116        return err;
1117}
1118
1119/**
1120 *      ip6_dst_lookup - perform route lookup on flow
1121 *      @net: Network namespace to perform lookup in
1122 *      @sk: socket which provides route info
1123 *      @dst: pointer to dst_entry * for result
1124 *      @fl6: flow to lookup
1125 *
1126 *      This function performs a route lookup on the given flow.
1127 *
1128 *      It returns zero on success, or a standard errno code on error.
1129 */
1130int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1131                   struct flowi6 *fl6)
1132{
1133        *dst = NULL;
1134        return ip6_dst_lookup_tail(net, sk, dst, fl6);
1135}
1136EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1137
1138/**
1139 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1140 *      @net: Network namespace to perform lookup in
1141 *      @sk: socket which provides route info
1142 *      @fl6: flow to lookup
1143 *      @final_dst: final destination address for ipsec lookup
1144 *
1145 *      This function performs a route lookup on the given flow.
1146 *
1147 *      It returns a valid dst pointer on success, or a pointer encoded
1148 *      error code.
1149 */
1150struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1151                                      const struct in6_addr *final_dst)
1152{
1153        struct dst_entry *dst = NULL;
1154        int err;
1155
1156        err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1157        if (err)
1158                return ERR_PTR(err);
1159        if (final_dst)
1160                fl6->daddr = *final_dst;
1161
1162        return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1163}
1164EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1165
1166/**
1167 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1168 *      @sk: socket which provides the dst cache and route info
1169 *      @fl6: flow to lookup
1170 *      @final_dst: final destination address for ipsec lookup
1171 *      @connected: whether @sk is connected or not
1172 *
1173 *      This function performs a route lookup on the given flow with the
1174 *      possibility of using the cached route in the socket if it is valid.
1175 *      It will take the socket dst lock when operating on the dst cache.
1176 *      As a result, this function can only be used in process context.
1177 *
1178 *      In addition, for a connected socket, cache the dst in the socket
1179 *      if the current cache is not valid.
1180 *
1181 *      It returns a valid dst pointer on success, or a pointer encoded
1182 *      error code.
1183 */
1184struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1185                                         const struct in6_addr *final_dst,
1186                                         bool connected)
1187{
1188        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1189
1190        dst = ip6_sk_dst_check(sk, dst, fl6);
1191        if (dst)
1192                return dst;
1193
1194        dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1195        if (connected && !IS_ERR(dst))
1196                ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1197
1198        return dst;
1199}
1200EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1201
1202/**
1203 *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1204 *      @skb: Packet for which lookup is done
1205 *      @dev: Tunnel device
1206 *      @net: Network namespace of tunnel device
1207 *      @sock: Socket which provides route info
1208 *      @saddr: Memory to store the src ip address
1209 *      @info: Tunnel information
1210 *      @protocol: IP protocol
1211 *      @use_cache: Flag to enable cache usage
1212 *      This function performs a route lookup on a tunnel
1213 *
1214 *      It returns a valid dst pointer and stores src address to be used in
1215 *      tunnel in param saddr on success, else a pointer encoded error code.
1216 */
1217
1218struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1219                                        struct net_device *dev,
1220                                        struct net *net,
1221                                        struct socket *sock,
1222                                        struct in6_addr *saddr,
1223                                        const struct ip_tunnel_info *info,
1224                                        u8 protocol,
1225                                        bool use_cache)
1226{
1227        struct dst_entry *dst = NULL;
1228#ifdef CONFIG_DST_CACHE
1229        struct dst_cache *dst_cache;
1230#endif
1231        struct flowi6 fl6;
1232        __u8 prio;
1233
1234#ifdef CONFIG_DST_CACHE
1235        dst_cache = (struct dst_cache *)&info->dst_cache;
1236        if (use_cache) {
1237                dst = dst_cache_get_ip6(dst_cache, saddr);
1238                if (dst)
1239                        return dst;
1240        }
1241#endif
1242        memset(&fl6, 0, sizeof(fl6));
1243        fl6.flowi6_mark = skb->mark;
1244        fl6.flowi6_proto = protocol;
1245        fl6.daddr = info->key.u.ipv6.dst;
1246        fl6.saddr = info->key.u.ipv6.src;
1247        prio = info->key.tos;
1248        fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1249                                          info->key.label);
1250
1251        dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1252                                              NULL);
1253        if (IS_ERR(dst)) {
1254                netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1255                return ERR_PTR(-ENETUNREACH);
1256        }
1257        if (dst->dev == dev) { /* is this necessary? */
1258                netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1259                dst_release(dst);
1260                return ERR_PTR(-ELOOP);
1261        }
1262#ifdef CONFIG_DST_CACHE
1263        if (use_cache)
1264                dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1265#endif
1266        *saddr = fl6.saddr;
1267        return dst;
1268}
1269EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1270
1271static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1272                                               gfp_t gfp)
1273{
1274        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1275}
1276
1277static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1278                                                gfp_t gfp)
1279{
1280        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1281}
1282
1283static void ip6_append_data_mtu(unsigned int *mtu,
1284                                int *maxfraglen,
1285                                unsigned int fragheaderlen,
1286                                struct sk_buff *skb,
1287                                struct rt6_info *rt,
1288                                unsigned int orig_mtu)
1289{
1290        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1291                if (!skb) {
1292                        /* first fragment, reserve header_len */
1293                        *mtu = orig_mtu - rt->dst.header_len;
1294
1295                } else {
1296                        /*
1297                         * this fragment is not first, the headers
1298                         * space is regarded as data space.
1299                         */
1300                        *mtu = orig_mtu;
1301                }
1302                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1303                              + fragheaderlen - sizeof(struct frag_hdr);
1304        }
1305}
1306
1307static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1308                          struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1309                          struct rt6_info *rt, struct flowi6 *fl6)
1310{
1311        struct ipv6_pinfo *np = inet6_sk(sk);
1312        unsigned int mtu;
1313        struct ipv6_txoptions *opt = ipc6->opt;
1314
1315        /*
1316         * setup for corking
1317         */
1318        if (opt) {
1319                if (WARN_ON(v6_cork->opt))
1320                        return -EINVAL;
1321
1322                v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1323                if (unlikely(!v6_cork->opt))
1324                        return -ENOBUFS;
1325
1326                v6_cork->opt->tot_len = sizeof(*opt);
1327                v6_cork->opt->opt_flen = opt->opt_flen;
1328                v6_cork->opt->opt_nflen = opt->opt_nflen;
1329
1330                v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1331                                                    sk->sk_allocation);
1332                if (opt->dst0opt && !v6_cork->opt->dst0opt)
1333                        return -ENOBUFS;
1334
1335                v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1336                                                    sk->sk_allocation);
1337                if (opt->dst1opt && !v6_cork->opt->dst1opt)
1338                        return -ENOBUFS;
1339
1340                v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1341                                                   sk->sk_allocation);
1342                if (opt->hopopt && !v6_cork->opt->hopopt)
1343                        return -ENOBUFS;
1344
1345                v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1346                                                    sk->sk_allocation);
1347                if (opt->srcrt && !v6_cork->opt->srcrt)
1348                        return -ENOBUFS;
1349
1350                /* need source address above miyazawa*/
1351        }
1352        dst_hold(&rt->dst);
1353        cork->base.dst = &rt->dst;
1354        cork->fl.u.ip6 = *fl6;
1355        v6_cork->hop_limit = ipc6->hlimit;
1356        v6_cork->tclass = ipc6->tclass;
1357        if (rt->dst.flags & DST_XFRM_TUNNEL)
1358                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1359                      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1360        else
1361                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1362                        READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1363        if (np->frag_size < mtu) {
1364                if (np->frag_size)
1365                        mtu = np->frag_size;
1366        }
1367        if (mtu < IPV6_MIN_MTU)
1368                return -EINVAL;
1369        cork->base.fragsize = mtu;
1370        cork->base.gso_size = ipc6->gso_size;
1371        cork->base.tx_flags = 0;
1372        cork->base.mark = ipc6->sockc.mark;
1373        sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1374
1375        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1376                cork->base.flags |= IPCORK_ALLFRAG;
1377        cork->base.length = 0;
1378
1379        cork->base.transmit_time = ipc6->sockc.transmit_time;
1380
1381        return 0;
1382}
1383
1384static int __ip6_append_data(struct sock *sk,
1385                             struct flowi6 *fl6,
1386                             struct sk_buff_head *queue,
1387                             struct inet_cork *cork,
1388                             struct inet6_cork *v6_cork,
1389                             struct page_frag *pfrag,
1390                             int getfrag(void *from, char *to, int offset,
1391                                         int len, int odd, struct sk_buff *skb),
1392                             void *from, int length, int transhdrlen,
1393                             unsigned int flags, struct ipcm6_cookie *ipc6)
1394{
1395        struct sk_buff *skb, *skb_prev = NULL;
1396        unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1397        struct ubuf_info *uarg = NULL;
1398        int exthdrlen = 0;
1399        int dst_exthdrlen = 0;
1400        int hh_len;
1401        int copy;
1402        int err;
1403        int offset = 0;
1404        u32 tskey = 0;
1405        struct rt6_info *rt = (struct rt6_info *)cork->dst;
1406        struct ipv6_txoptions *opt = v6_cork->opt;
1407        int csummode = CHECKSUM_NONE;
1408        unsigned int maxnonfragsize, headersize;
1409        unsigned int wmem_alloc_delta = 0;
1410        bool paged, extra_uref = false;
1411
1412        skb = skb_peek_tail(queue);
1413        if (!skb) {
1414                exthdrlen = opt ? opt->opt_flen : 0;
1415                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1416        }
1417
1418        paged = !!cork->gso_size;
1419        mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1420        orig_mtu = mtu;
1421
1422        if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1423            sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1424                tskey = sk->sk_tskey++;
1425
1426        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1427
1428        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1429                        (opt ? opt->opt_nflen : 0);
1430        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1431                     sizeof(struct frag_hdr);
1432
1433        headersize = sizeof(struct ipv6hdr) +
1434                     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1435                     (dst_allfrag(&rt->dst) ?
1436                      sizeof(struct frag_hdr) : 0) +
1437                     rt->rt6i_nfheader_len;
1438
1439        /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1440         * the first fragment
1441         */
1442        if (headersize + transhdrlen > mtu)
1443                goto emsgsize;
1444
1445        if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1446            (sk->sk_protocol == IPPROTO_UDP ||
1447             sk->sk_protocol == IPPROTO_RAW)) {
1448                ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1449                                sizeof(struct ipv6hdr));
1450                goto emsgsize;
1451        }
1452
1453        if (ip6_sk_ignore_df(sk))
1454                maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1455        else
1456                maxnonfragsize = mtu;
1457
1458        if (cork->length + length > maxnonfragsize - headersize) {
1459emsgsize:
1460                pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1461                ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1462                return -EMSGSIZE;
1463        }
1464
1465        /* CHECKSUM_PARTIAL only with no extension headers and when
1466         * we are not going to fragment
1467         */
1468        if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1469            headersize == sizeof(struct ipv6hdr) &&
1470            length <= mtu - headersize &&
1471            (!(flags & MSG_MORE) || cork->gso_size) &&
1472            rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1473                csummode = CHECKSUM_PARTIAL;
1474
1475        if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1476                uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1477                if (!uarg)
1478                        return -ENOBUFS;
1479                extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1480                if (rt->dst.dev->features & NETIF_F_SG &&
1481                    csummode == CHECKSUM_PARTIAL) {
1482                        paged = true;
1483                } else {
1484                        uarg->zerocopy = 0;
1485                        skb_zcopy_set(skb, uarg, &extra_uref);
1486                }
1487        }
1488
1489        /*
1490         * Let's try using as much space as possible.
1491         * Use MTU if total length of the message fits into the MTU.
1492         * Otherwise, we need to reserve fragment header and
1493         * fragment alignment (= 8-15 octects, in total).
1494         *
1495         * Note that we may need to "move" the data from the tail of
1496         * of the buffer to the new fragment when we split
1497         * the message.
1498         *
1499         * FIXME: It may be fragmented into multiple chunks
1500         *        at once if non-fragmentable extension headers
1501         *        are too large.
1502         * --yoshfuji
1503         */
1504
1505        cork->length += length;
1506        if (!skb)
1507                goto alloc_new_skb;
1508
1509        while (length > 0) {
1510                /* Check if the remaining data fits into current packet. */
1511                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1512                if (copy < length)
1513                        copy = maxfraglen - skb->len;
1514
1515                if (copy <= 0) {
1516                        char *data;
1517                        unsigned int datalen;
1518                        unsigned int fraglen;
1519                        unsigned int fraggap;
1520                        unsigned int alloclen;
1521                        unsigned int pagedlen;
1522alloc_new_skb:
1523                        /* There's no room in the current skb */
1524                        if (skb)
1525                                fraggap = skb->len - maxfraglen;
1526                        else
1527                                fraggap = 0;
1528                        /* update mtu and maxfraglen if necessary */
1529                        if (!skb || !skb_prev)
1530                                ip6_append_data_mtu(&mtu, &maxfraglen,
1531                                                    fragheaderlen, skb, rt,
1532                                                    orig_mtu);
1533
1534                        skb_prev = skb;
1535
1536                        /*
1537                         * If remaining data exceeds the mtu,
1538                         * we know we need more fragment(s).
1539                         */
1540                        datalen = length + fraggap;
1541
1542                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1543                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1544                        fraglen = datalen + fragheaderlen;
1545                        pagedlen = 0;
1546
1547                        if ((flags & MSG_MORE) &&
1548                            !(rt->dst.dev->features&NETIF_F_SG))
1549                                alloclen = mtu;
1550                        else if (!paged)
1551                                alloclen = fraglen;
1552                        else {
1553                                alloclen = min_t(int, fraglen, MAX_HEADER);
1554                                pagedlen = fraglen - alloclen;
1555                        }
1556
1557                        alloclen += dst_exthdrlen;
1558
1559                        if (datalen != length + fraggap) {
1560                                /*
1561                                 * this is not the last fragment, the trailer
1562                                 * space is regarded as data space.
1563                                 */
1564                                datalen += rt->dst.trailer_len;
1565                        }
1566
1567                        alloclen += rt->dst.trailer_len;
1568                        fraglen = datalen + fragheaderlen;
1569
1570                        /*
1571                         * We just reserve space for fragment header.
1572                         * Note: this may be overallocation if the message
1573                         * (without MSG_MORE) fits into the MTU.
1574                         */
1575                        alloclen += sizeof(struct frag_hdr);
1576
1577                        copy = datalen - transhdrlen - fraggap - pagedlen;
1578                        if (copy < 0) {
1579                                err = -EINVAL;
1580                                goto error;
1581                        }
1582                        if (transhdrlen) {
1583                                skb = sock_alloc_send_skb(sk,
1584                                                alloclen + hh_len,
1585                                                (flags & MSG_DONTWAIT), &err);
1586                        } else {
1587                                skb = NULL;
1588                                if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1589                                    2 * sk->sk_sndbuf)
1590                                        skb = alloc_skb(alloclen + hh_len,
1591                                                        sk->sk_allocation);
1592                                if (unlikely(!skb))
1593                                        err = -ENOBUFS;
1594                        }
1595                        if (!skb)
1596                                goto error;
1597                        /*
1598                         *      Fill in the control structures
1599                         */
1600                        skb->protocol = htons(ETH_P_IPV6);
1601                        skb->ip_summed = csummode;
1602                        skb->csum = 0;
1603                        /* reserve for fragmentation and ipsec header */
1604                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1605                                    dst_exthdrlen);
1606
1607                        /*
1608                         *      Find where to start putting bytes
1609                         */
1610                        data = skb_put(skb, fraglen - pagedlen);
1611                        skb_set_network_header(skb, exthdrlen);
1612                        data += fragheaderlen;
1613                        skb->transport_header = (skb->network_header +
1614                                                 fragheaderlen);
1615                        if (fraggap) {
1616                                skb->csum = skb_copy_and_csum_bits(
1617                                        skb_prev, maxfraglen,
1618                                        data + transhdrlen, fraggap, 0);
1619                                skb_prev->csum = csum_sub(skb_prev->csum,
1620                                                          skb->csum);
1621                                data += fraggap;
1622                                pskb_trim_unique(skb_prev, maxfraglen);
1623                        }
1624                        if (copy > 0 &&
1625                            getfrag(from, data + transhdrlen, offset,
1626                                    copy, fraggap, skb) < 0) {
1627                                err = -EFAULT;
1628                                kfree_skb(skb);
1629                                goto error;
1630                        }
1631
1632                        offset += copy;
1633                        length -= copy + transhdrlen;
1634                        transhdrlen = 0;
1635                        exthdrlen = 0;
1636                        dst_exthdrlen = 0;
1637
1638                        /* Only the initial fragment is time stamped */
1639                        skb_shinfo(skb)->tx_flags = cork->tx_flags;
1640                        cork->tx_flags = 0;
1641                        skb_shinfo(skb)->tskey = tskey;
1642                        tskey = 0;
1643                        skb_zcopy_set(skb, uarg, &extra_uref);
1644
1645                        if ((flags & MSG_CONFIRM) && !skb_prev)
1646                                skb_set_dst_pending_confirm(skb, 1);
1647
1648                        /*
1649                         * Put the packet on the pending queue
1650                         */
1651                        if (!skb->destructor) {
1652                                skb->destructor = sock_wfree;
1653                                skb->sk = sk;
1654                                wmem_alloc_delta += skb->truesize;
1655                        }
1656                        __skb_queue_tail(queue, skb);
1657                        continue;
1658                }
1659
1660                if (copy > length)
1661                        copy = length;
1662
1663                if (!(rt->dst.dev->features&NETIF_F_SG) &&
1664                    skb_tailroom(skb) >= copy) {
1665                        unsigned int off;
1666
1667                        off = skb->len;
1668                        if (getfrag(from, skb_put(skb, copy),
1669                                                offset, copy, off, skb) < 0) {
1670                                __skb_trim(skb, off);
1671                                err = -EFAULT;
1672                                goto error;
1673                        }
1674                } else if (!uarg || !uarg->zerocopy) {
1675                        int i = skb_shinfo(skb)->nr_frags;
1676
1677                        err = -ENOMEM;
1678                        if (!sk_page_frag_refill(sk, pfrag))
1679                                goto error;
1680
1681                        if (!skb_can_coalesce(skb, i, pfrag->page,
1682                                              pfrag->offset)) {
1683                                err = -EMSGSIZE;
1684                                if (i == MAX_SKB_FRAGS)
1685                                        goto error;
1686
1687                                __skb_fill_page_desc(skb, i, pfrag->page,
1688                                                     pfrag->offset, 0);
1689                                skb_shinfo(skb)->nr_frags = ++i;
1690                                get_page(pfrag->page);
1691                        }
1692                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1693                        if (getfrag(from,
1694                                    page_address(pfrag->page) + pfrag->offset,
1695                                    offset, copy, skb->len, skb) < 0)
1696                                goto error_efault;
1697
1698                        pfrag->offset += copy;
1699                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1700                        skb->len += copy;
1701                        skb->data_len += copy;
1702                        skb->truesize += copy;
1703                        wmem_alloc_delta += copy;
1704                } else {
1705                        err = skb_zerocopy_iter_dgram(skb, from, copy);
1706                        if (err < 0)
1707                                goto error;
1708                }
1709                offset += copy;
1710                length -= copy;
1711        }
1712
1713        if (wmem_alloc_delta)
1714                refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1715        return 0;
1716
1717error_efault:
1718        err = -EFAULT;
1719error:
1720        if (uarg)
1721                sock_zerocopy_put_abort(uarg, extra_uref);
1722        cork->length -= length;
1723        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1724        refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1725        return err;
1726}
1727
1728int ip6_append_data(struct sock *sk,
1729                    int getfrag(void *from, char *to, int offset, int len,
1730                                int odd, struct sk_buff *skb),
1731                    void *from, int length, int transhdrlen,
1732                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1733                    struct rt6_info *rt, unsigned int flags)
1734{
1735        struct inet_sock *inet = inet_sk(sk);
1736        struct ipv6_pinfo *np = inet6_sk(sk);
1737        int exthdrlen;
1738        int err;
1739
1740        if (flags&MSG_PROBE)
1741                return 0;
1742        if (skb_queue_empty(&sk->sk_write_queue)) {
1743                /*
1744                 * setup for corking
1745                 */
1746                err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1747                                     ipc6, rt, fl6);
1748                if (err)
1749                        return err;
1750
1751                exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1752                length += exthdrlen;
1753                transhdrlen += exthdrlen;
1754        } else {
1755                fl6 = &inet->cork.fl.u.ip6;
1756                transhdrlen = 0;
1757        }
1758
1759        return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1760                                 &np->cork, sk_page_frag(sk), getfrag,
1761                                 from, length, transhdrlen, flags, ipc6);
1762}
1763EXPORT_SYMBOL_GPL(ip6_append_data);
1764
1765static void ip6_cork_release(struct inet_cork_full *cork,
1766                             struct inet6_cork *v6_cork)
1767{
1768        if (v6_cork->opt) {
1769                kfree(v6_cork->opt->dst0opt);
1770                kfree(v6_cork->opt->dst1opt);
1771                kfree(v6_cork->opt->hopopt);
1772                kfree(v6_cork->opt->srcrt);
1773                kfree(v6_cork->opt);
1774                v6_cork->opt = NULL;
1775        }
1776
1777        if (cork->base.dst) {
1778                dst_release(cork->base.dst);
1779                cork->base.dst = NULL;
1780                cork->base.flags &= ~IPCORK_ALLFRAG;
1781        }
1782        memset(&cork->fl, 0, sizeof(cork->fl));
1783}
1784
1785struct sk_buff *__ip6_make_skb(struct sock *sk,
1786                               struct sk_buff_head *queue,
1787                               struct inet_cork_full *cork,
1788                               struct inet6_cork *v6_cork)
1789{
1790        struct sk_buff *skb, *tmp_skb;
1791        struct sk_buff **tail_skb;
1792        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1793        struct ipv6_pinfo *np = inet6_sk(sk);
1794        struct net *net = sock_net(sk);
1795        struct ipv6hdr *hdr;
1796        struct ipv6_txoptions *opt = v6_cork->opt;
1797        struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1798        struct flowi6 *fl6 = &cork->fl.u.ip6;
1799        unsigned char proto = fl6->flowi6_proto;
1800
1801        skb = __skb_dequeue(queue);
1802        if (!skb)
1803                goto out;
1804        tail_skb = &(skb_shinfo(skb)->frag_list);
1805
1806        /* move skb->data to ip header from ext header */
1807        if (skb->data < skb_network_header(skb))
1808                __skb_pull(skb, skb_network_offset(skb));
1809        while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1810                __skb_pull(tmp_skb, skb_network_header_len(skb));
1811                *tail_skb = tmp_skb;
1812                tail_skb = &(tmp_skb->next);
1813                skb->len += tmp_skb->len;
1814                skb->data_len += tmp_skb->len;
1815                skb->truesize += tmp_skb->truesize;
1816                tmp_skb->destructor = NULL;
1817                tmp_skb->sk = NULL;
1818        }
1819
1820        /* Allow local fragmentation. */
1821        skb->ignore_df = ip6_sk_ignore_df(sk);
1822
1823        *final_dst = fl6->daddr;
1824        __skb_pull(skb, skb_network_header_len(skb));
1825        if (opt && opt->opt_flen)
1826                ipv6_push_frag_opts(skb, opt, &proto);
1827        if (opt && opt->opt_nflen)
1828                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1829
1830        skb_push(skb, sizeof(struct ipv6hdr));
1831        skb_reset_network_header(skb);
1832        hdr = ipv6_hdr(skb);
1833
1834        ip6_flow_hdr(hdr, v6_cork->tclass,
1835                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1836                                        ip6_autoflowlabel(net, np), fl6));
1837        hdr->hop_limit = v6_cork->hop_limit;
1838        hdr->nexthdr = proto;
1839        hdr->saddr = fl6->saddr;
1840        hdr->daddr = *final_dst;
1841
1842        skb->priority = sk->sk_priority;
1843        skb->mark = cork->base.mark;
1844
1845        skb->tstamp = cork->base.transmit_time;
1846
1847        skb_dst_set(skb, dst_clone(&rt->dst));
1848        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1849        if (proto == IPPROTO_ICMPV6) {
1850                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1851
1852                ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1853                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1854        }
1855
1856        ip6_cork_release(cork, v6_cork);
1857out:
1858        return skb;
1859}
1860
1861int ip6_send_skb(struct sk_buff *skb)
1862{
1863        struct net *net = sock_net(skb->sk);
1864        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1865        int err;
1866
1867        err = ip6_local_out(net, skb->sk, skb);
1868        if (err) {
1869                if (err > 0)
1870                        err = net_xmit_errno(err);
1871                if (err)
1872                        IP6_INC_STATS(net, rt->rt6i_idev,
1873                                      IPSTATS_MIB_OUTDISCARDS);
1874        }
1875
1876        return err;
1877}
1878
1879int ip6_push_pending_frames(struct sock *sk)
1880{
1881        struct sk_buff *skb;
1882
1883        skb = ip6_finish_skb(sk);
1884        if (!skb)
1885                return 0;
1886
1887        return ip6_send_skb(skb);
1888}
1889EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1890
1891static void __ip6_flush_pending_frames(struct sock *sk,
1892                                       struct sk_buff_head *queue,
1893                                       struct inet_cork_full *cork,
1894                                       struct inet6_cork *v6_cork)
1895{
1896        struct sk_buff *skb;
1897
1898        while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1899                if (skb_dst(skb))
1900                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1901                                      IPSTATS_MIB_OUTDISCARDS);
1902                kfree_skb(skb);
1903        }
1904
1905        ip6_cork_release(cork, v6_cork);
1906}
1907
1908void ip6_flush_pending_frames(struct sock *sk)
1909{
1910        __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1911                                   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1912}
1913EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1914
1915struct sk_buff *ip6_make_skb(struct sock *sk,
1916                             int getfrag(void *from, char *to, int offset,
1917                                         int len, int odd, struct sk_buff *skb),
1918                             void *from, int length, int transhdrlen,
1919                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1920                             struct rt6_info *rt, unsigned int flags,
1921                             struct inet_cork_full *cork)
1922{
1923        struct inet6_cork v6_cork;
1924        struct sk_buff_head queue;
1925        int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1926        int err;
1927
1928        if (flags & MSG_PROBE)
1929                return NULL;
1930
1931        __skb_queue_head_init(&queue);
1932
1933        cork->base.flags = 0;
1934        cork->base.addr = 0;
1935        cork->base.opt = NULL;
1936        cork->base.dst = NULL;
1937        v6_cork.opt = NULL;
1938        err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1939        if (err) {
1940                ip6_cork_release(cork, &v6_cork);
1941                return ERR_PTR(err);
1942        }
1943        if (ipc6->dontfrag < 0)
1944                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1945
1946        err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1947                                &current->task_frag, getfrag, from,
1948                                length + exthdrlen, transhdrlen + exthdrlen,
1949                                flags, ipc6);
1950        if (err) {
1951                __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1952                return ERR_PTR(err);
1953        }
1954
1955        return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1956}
1957