linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 output functions
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on linux/net/ipv4/ip_output.c
  10 *
  11 *      Changes:
  12 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13 *                              extension headers are implemented.
  14 *                              route changes now work.
  15 *                              ip6_forward does not confuse sniffers.
  16 *                              etc.
  17 *
  18 *      H. von Brand    :       Added missing #include <linux/string.h>
  19 *      Imran Patel     :       frag id should be in NBO
  20 *      Kazunori MIYAZAWA @USAGI
  21 *                      :       add ip6_append_data and related functions
  22 *                              for datagram xmit
  23 */
  24
  25#include <linux/errno.h>
  26#include <linux/kernel.h>
  27#include <linux/string.h>
  28#include <linux/socket.h>
  29#include <linux/net.h>
  30#include <linux/netdevice.h>
  31#include <linux/if_arp.h>
  32#include <linux/in6.h>
  33#include <linux/tcp.h>
  34#include <linux/route.h>
  35#include <linux/module.h>
  36#include <linux/slab.h>
  37
  38#include <linux/bpf-cgroup.h>
  39#include <linux/netfilter.h>
  40#include <linux/netfilter_ipv6.h>
  41
  42#include <net/sock.h>
  43#include <net/snmp.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ndisc.h>
  47#include <net/protocol.h>
  48#include <net/ip6_route.h>
  49#include <net/addrconf.h>
  50#include <net/rawv6.h>
  51#include <net/icmp.h>
  52#include <net/xfrm.h>
  53#include <net/checksum.h>
  54#include <linux/mroute6.h>
  55#include <net/l3mdev.h>
  56#include <net/lwtunnel.h>
  57
  58static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  59{
  60        struct dst_entry *dst = skb_dst(skb);
  61        struct net_device *dev = dst->dev;
  62        const struct in6_addr *nexthop;
  63        struct neighbour *neigh;
  64        int ret;
  65
  66        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  67                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  68
  69                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  70                    ((mroute6_is_socket(net, skb) &&
  71                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  72                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  73                                         &ipv6_hdr(skb)->saddr))) {
  74                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  75
  76                        /* Do not check for IFF_ALLMULTI; multicast routing
  77                           is not supported in any case.
  78                         */
  79                        if (newskb)
  80                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  81                                        net, sk, newskb, NULL, newskb->dev,
  82                                        dev_loopback_xmit);
  83
  84                        if (ipv6_hdr(skb)->hop_limit == 0) {
  85                                IP6_INC_STATS(net, idev,
  86                                              IPSTATS_MIB_OUTDISCARDS);
  87                                kfree_skb(skb);
  88                                return 0;
  89                        }
  90                }
  91
  92                IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  93
  94                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  95                    IPV6_ADDR_SCOPE_NODELOCAL &&
  96                    !(dev->flags & IFF_LOOPBACK)) {
  97                        kfree_skb(skb);
  98                        return 0;
  99                }
 100        }
 101
 102        if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 103                int res = lwtunnel_xmit(skb);
 104
 105                if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 106                        return res;
 107        }
 108
 109        rcu_read_lock_bh();
 110        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 111        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 112        if (unlikely(!neigh))
 113                neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 114        if (!IS_ERR(neigh)) {
 115                sock_confirm_neigh(skb, neigh);
 116                ret = neigh_output(neigh, skb, false);
 117                rcu_read_unlock_bh();
 118                return ret;
 119        }
 120        rcu_read_unlock_bh();
 121
 122        IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 123        kfree_skb(skb);
 124        return -EINVAL;
 125}
 126
 127static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 128{
 129#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 130        /* Policy lookup after SNAT yielded a new policy */
 131        if (skb_dst(skb)->xfrm) {
 132                IPCB(skb)->flags |= IPSKB_REROUTED;
 133                return dst_output(net, sk, skb);
 134        }
 135#endif
 136
 137        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 138            dst_allfrag(skb_dst(skb)) ||
 139            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 140                return ip6_fragment(net, sk, skb, ip6_finish_output2);
 141        else
 142                return ip6_finish_output2(net, sk, skb);
 143}
 144
 145static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 146{
 147        int ret;
 148
 149        ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 150        switch (ret) {
 151        case NET_XMIT_SUCCESS:
 152                return __ip6_finish_output(net, sk, skb);
 153        case NET_XMIT_CN:
 154                return __ip6_finish_output(net, sk, skb) ? : ret;
 155        default:
 156                kfree_skb(skb);
 157                return ret;
 158        }
 159}
 160
 161int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 162{
 163        struct net_device *dev = skb_dst(skb)->dev;
 164        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 165
 166        skb->protocol = htons(ETH_P_IPV6);
 167        skb->dev = dev;
 168
 169        if (unlikely(idev->cnf.disable_ipv6)) {
 170                IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 171                kfree_skb(skb);
 172                return 0;
 173        }
 174
 175        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 176                            net, sk, skb, NULL, dev,
 177                            ip6_finish_output,
 178                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 179}
 180
 181bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 182{
 183        if (!np->autoflowlabel_set)
 184                return ip6_default_np_autolabel(net);
 185        else
 186                return np->autoflowlabel;
 187}
 188
 189/*
 190 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 191 * Note : socket lock is not held for SYNACK packets, but might be modified
 192 * by calls to skb_set_owner_w() and ipv6_local_error(),
 193 * which are using proper atomic operations or spinlocks.
 194 */
 195int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 196             __u32 mark, struct ipv6_txoptions *opt, int tclass)
 197{
 198        struct net *net = sock_net(sk);
 199        const struct ipv6_pinfo *np = inet6_sk(sk);
 200        struct in6_addr *first_hop = &fl6->daddr;
 201        struct dst_entry *dst = skb_dst(skb);
 202        unsigned int head_room;
 203        struct ipv6hdr *hdr;
 204        u8  proto = fl6->flowi6_proto;
 205        int seg_len = skb->len;
 206        int hlimit = -1;
 207        u32 mtu;
 208
 209        head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 210        if (opt)
 211                head_room += opt->opt_nflen + opt->opt_flen;
 212
 213        if (unlikely(skb_headroom(skb) < head_room)) {
 214                struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 215                if (!skb2) {
 216                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 217                                      IPSTATS_MIB_OUTDISCARDS);
 218                        kfree_skb(skb);
 219                        return -ENOBUFS;
 220                }
 221                if (skb->sk)
 222                        skb_set_owner_w(skb2, skb->sk);
 223                consume_skb(skb);
 224                skb = skb2;
 225        }
 226
 227        if (opt) {
 228                seg_len += opt->opt_nflen + opt->opt_flen;
 229
 230                if (opt->opt_flen)
 231                        ipv6_push_frag_opts(skb, opt, &proto);
 232
 233                if (opt->opt_nflen)
 234                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 235                                             &fl6->saddr);
 236        }
 237
 238        skb_push(skb, sizeof(struct ipv6hdr));
 239        skb_reset_network_header(skb);
 240        hdr = ipv6_hdr(skb);
 241
 242        /*
 243         *      Fill in the IPv6 header
 244         */
 245        if (np)
 246                hlimit = np->hop_limit;
 247        if (hlimit < 0)
 248                hlimit = ip6_dst_hoplimit(dst);
 249
 250        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 251                                ip6_autoflowlabel(net, np), fl6));
 252
 253        hdr->payload_len = htons(seg_len);
 254        hdr->nexthdr = proto;
 255        hdr->hop_limit = hlimit;
 256
 257        hdr->saddr = fl6->saddr;
 258        hdr->daddr = *first_hop;
 259
 260        skb->protocol = htons(ETH_P_IPV6);
 261        skb->priority = sk->sk_priority;
 262        skb->mark = mark;
 263
 264        mtu = dst_mtu(dst);
 265        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 266                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 267                              IPSTATS_MIB_OUT, skb->len);
 268
 269                /* if egress device is enslaved to an L3 master device pass the
 270                 * skb to its handler for processing
 271                 */
 272                skb = l3mdev_ip6_out((struct sock *)sk, skb);
 273                if (unlikely(!skb))
 274                        return 0;
 275
 276                /* hooks should never assume socket lock is held.
 277                 * we promote our socket to non const
 278                 */
 279                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 280                               net, (struct sock *)sk, skb, NULL, dst->dev,
 281                               dst_output);
 282        }
 283
 284        skb->dev = dst->dev;
 285        /* ipv6_local_error() does not require socket lock,
 286         * we promote our socket to non const
 287         */
 288        ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 289
 290        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 291        kfree_skb(skb);
 292        return -EMSGSIZE;
 293}
 294EXPORT_SYMBOL(ip6_xmit);
 295
 296static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 297{
 298        struct ip6_ra_chain *ra;
 299        struct sock *last = NULL;
 300
 301        read_lock(&ip6_ra_lock);
 302        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 303                struct sock *sk = ra->sk;
 304                if (sk && ra->sel == sel &&
 305                    (!sk->sk_bound_dev_if ||
 306                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 307                        struct ipv6_pinfo *np = inet6_sk(sk);
 308
 309                        if (np && np->rtalert_isolate &&
 310                            !net_eq(sock_net(sk), dev_net(skb->dev))) {
 311                                continue;
 312                        }
 313                        if (last) {
 314                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 315                                if (skb2)
 316                                        rawv6_rcv(last, skb2);
 317                        }
 318                        last = sk;
 319                }
 320        }
 321
 322        if (last) {
 323                rawv6_rcv(last, skb);
 324                read_unlock(&ip6_ra_lock);
 325                return 1;
 326        }
 327        read_unlock(&ip6_ra_lock);
 328        return 0;
 329}
 330
 331static int ip6_forward_proxy_check(struct sk_buff *skb)
 332{
 333        struct ipv6hdr *hdr = ipv6_hdr(skb);
 334        u8 nexthdr = hdr->nexthdr;
 335        __be16 frag_off;
 336        int offset;
 337
 338        if (ipv6_ext_hdr(nexthdr)) {
 339                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 340                if (offset < 0)
 341                        return 0;
 342        } else
 343                offset = sizeof(struct ipv6hdr);
 344
 345        if (nexthdr == IPPROTO_ICMPV6) {
 346                struct icmp6hdr *icmp6;
 347
 348                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 349                                         offset + 1 - skb->data)))
 350                        return 0;
 351
 352                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 353
 354                switch (icmp6->icmp6_type) {
 355                case NDISC_ROUTER_SOLICITATION:
 356                case NDISC_ROUTER_ADVERTISEMENT:
 357                case NDISC_NEIGHBOUR_SOLICITATION:
 358                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 359                case NDISC_REDIRECT:
 360                        /* For reaction involving unicast neighbor discovery
 361                         * message destined to the proxied address, pass it to
 362                         * input function.
 363                         */
 364                        return 1;
 365                default:
 366                        break;
 367                }
 368        }
 369
 370        /*
 371         * The proxying router can't forward traffic sent to a link-local
 372         * address, so signal the sender and discard the packet. This
 373         * behavior is clarified by the MIPv6 specification.
 374         */
 375        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 376                dst_link_failure(skb);
 377                return -1;
 378        }
 379
 380        return 0;
 381}
 382
 383static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 384                                     struct sk_buff *skb)
 385{
 386        struct dst_entry *dst = skb_dst(skb);
 387
 388        __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 389        __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 390
 391#ifdef CONFIG_NET_SWITCHDEV
 392        if (skb->offload_l3_fwd_mark) {
 393                consume_skb(skb);
 394                return 0;
 395        }
 396#endif
 397
 398        skb->tstamp = 0;
 399        return dst_output(net, sk, skb);
 400}
 401
 402static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 403{
 404        if (skb->len <= mtu)
 405                return false;
 406
 407        /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 408        if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 409                return true;
 410
 411        if (skb->ignore_df)
 412                return false;
 413
 414        if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 415                return false;
 416
 417        return true;
 418}
 419
 420int ip6_forward(struct sk_buff *skb)
 421{
 422        struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 423        struct dst_entry *dst = skb_dst(skb);
 424        struct ipv6hdr *hdr = ipv6_hdr(skb);
 425        struct inet6_skb_parm *opt = IP6CB(skb);
 426        struct net *net = dev_net(dst->dev);
 427        u32 mtu;
 428
 429        if (net->ipv6.devconf_all->forwarding == 0)
 430                goto error;
 431
 432        if (skb->pkt_type != PACKET_HOST)
 433                goto drop;
 434
 435        if (unlikely(skb->sk))
 436                goto drop;
 437
 438        if (skb_warn_if_lro(skb))
 439                goto drop;
 440
 441        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 442                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 443                goto drop;
 444        }
 445
 446        skb_forward_csum(skb);
 447
 448        /*
 449         *      We DO NOT make any processing on
 450         *      RA packets, pushing them to user level AS IS
 451         *      without ane WARRANTY that application will be able
 452         *      to interpret them. The reason is that we
 453         *      cannot make anything clever here.
 454         *
 455         *      We are not end-node, so that if packet contains
 456         *      AH/ESP, we cannot make anything.
 457         *      Defragmentation also would be mistake, RA packets
 458         *      cannot be fragmented, because there is no warranty
 459         *      that different fragments will go along one path. --ANK
 460         */
 461        if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 462                if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 463                        return 0;
 464        }
 465
 466        /*
 467         *      check and decrement ttl
 468         */
 469        if (hdr->hop_limit <= 1) {
 470                /* Force OUTPUT device used as source address */
 471                skb->dev = dst->dev;
 472                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 473                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 474
 475                kfree_skb(skb);
 476                return -ETIMEDOUT;
 477        }
 478
 479        /* XXX: idev->cnf.proxy_ndp? */
 480        if (net->ipv6.devconf_all->proxy_ndp &&
 481            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 482                int proxied = ip6_forward_proxy_check(skb);
 483                if (proxied > 0)
 484                        return ip6_input(skb);
 485                else if (proxied < 0) {
 486                        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 487                        goto drop;
 488                }
 489        }
 490
 491        if (!xfrm6_route_forward(skb)) {
 492                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 493                goto drop;
 494        }
 495        dst = skb_dst(skb);
 496
 497        /* IPv6 specs say nothing about it, but it is clear that we cannot
 498           send redirects to source routed frames.
 499           We don't send redirects to frames decapsulated from IPsec.
 500         */
 501        if (IP6CB(skb)->iif == dst->dev->ifindex &&
 502            opt->srcrt == 0 && !skb_sec_path(skb)) {
 503                struct in6_addr *target = NULL;
 504                struct inet_peer *peer;
 505                struct rt6_info *rt;
 506
 507                /*
 508                 *      incoming and outgoing devices are the same
 509                 *      send a redirect.
 510                 */
 511
 512                rt = (struct rt6_info *) dst;
 513                if (rt->rt6i_flags & RTF_GATEWAY)
 514                        target = &rt->rt6i_gateway;
 515                else
 516                        target = &hdr->daddr;
 517
 518                peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 519
 520                /* Limit redirects both by destination (here)
 521                   and by source (inside ndisc_send_redirect)
 522                 */
 523                if (inet_peer_xrlim_allow(peer, 1*HZ))
 524                        ndisc_send_redirect(skb, target);
 525                if (peer)
 526                        inet_putpeer(peer);
 527        } else {
 528                int addrtype = ipv6_addr_type(&hdr->saddr);
 529
 530                /* This check is security critical. */
 531                if (addrtype == IPV6_ADDR_ANY ||
 532                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 533                        goto error;
 534                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 535                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 536                                    ICMPV6_NOT_NEIGHBOUR, 0);
 537                        goto error;
 538                }
 539        }
 540
 541        mtu = ip6_dst_mtu_forward(dst);
 542        if (mtu < IPV6_MIN_MTU)
 543                mtu = IPV6_MIN_MTU;
 544
 545        if (ip6_pkt_too_big(skb, mtu)) {
 546                /* Again, force OUTPUT device used as source address */
 547                skb->dev = dst->dev;
 548                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 549                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 550                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 551                                IPSTATS_MIB_FRAGFAILS);
 552                kfree_skb(skb);
 553                return -EMSGSIZE;
 554        }
 555
 556        if (skb_cow(skb, dst->dev->hard_header_len)) {
 557                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 558                                IPSTATS_MIB_OUTDISCARDS);
 559                goto drop;
 560        }
 561
 562        hdr = ipv6_hdr(skb);
 563
 564        /* Mangling hops number delayed to point after skb COW */
 565
 566        hdr->hop_limit--;
 567
 568        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 569                       net, NULL, skb, skb->dev, dst->dev,
 570                       ip6_forward_finish);
 571
 572error:
 573        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 574drop:
 575        kfree_skb(skb);
 576        return -EINVAL;
 577}
 578
 579static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 580{
 581        to->pkt_type = from->pkt_type;
 582        to->priority = from->priority;
 583        to->protocol = from->protocol;
 584        skb_dst_drop(to);
 585        skb_dst_set(to, dst_clone(skb_dst(from)));
 586        to->dev = from->dev;
 587        to->mark = from->mark;
 588
 589        skb_copy_hash(to, from);
 590
 591#ifdef CONFIG_NET_SCHED
 592        to->tc_index = from->tc_index;
 593#endif
 594        nf_copy(to, from);
 595        skb_ext_copy(to, from);
 596        skb_copy_secmark(to, from);
 597}
 598
 599int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 600                      u8 nexthdr, __be32 frag_id,
 601                      struct ip6_fraglist_iter *iter)
 602{
 603        unsigned int first_len;
 604        struct frag_hdr *fh;
 605
 606        /* BUILD HEADER */
 607        *prevhdr = NEXTHDR_FRAGMENT;
 608        iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 609        if (!iter->tmp_hdr)
 610                return -ENOMEM;
 611
 612        iter->frag = skb_shinfo(skb)->frag_list;
 613        skb_frag_list_init(skb);
 614
 615        iter->offset = 0;
 616        iter->hlen = hlen;
 617        iter->frag_id = frag_id;
 618        iter->nexthdr = nexthdr;
 619
 620        __skb_pull(skb, hlen);
 621        fh = __skb_push(skb, sizeof(struct frag_hdr));
 622        __skb_push(skb, hlen);
 623        skb_reset_network_header(skb);
 624        memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 625
 626        fh->nexthdr = nexthdr;
 627        fh->reserved = 0;
 628        fh->frag_off = htons(IP6_MF);
 629        fh->identification = frag_id;
 630
 631        first_len = skb_pagelen(skb);
 632        skb->data_len = first_len - skb_headlen(skb);
 633        skb->len = first_len;
 634        ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 635
 636        return 0;
 637}
 638EXPORT_SYMBOL(ip6_fraglist_init);
 639
 640void ip6_fraglist_prepare(struct sk_buff *skb,
 641                          struct ip6_fraglist_iter *iter)
 642{
 643        struct sk_buff *frag = iter->frag;
 644        unsigned int hlen = iter->hlen;
 645        struct frag_hdr *fh;
 646
 647        frag->ip_summed = CHECKSUM_NONE;
 648        skb_reset_transport_header(frag);
 649        fh = __skb_push(frag, sizeof(struct frag_hdr));
 650        __skb_push(frag, hlen);
 651        skb_reset_network_header(frag);
 652        memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 653        iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 654        fh->nexthdr = iter->nexthdr;
 655        fh->reserved = 0;
 656        fh->frag_off = htons(iter->offset);
 657        if (frag->next)
 658                fh->frag_off |= htons(IP6_MF);
 659        fh->identification = iter->frag_id;
 660        ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 661        ip6_copy_metadata(frag, skb);
 662}
 663EXPORT_SYMBOL(ip6_fraglist_prepare);
 664
 665void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 666                   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 667                   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 668{
 669        state->prevhdr = prevhdr;
 670        state->nexthdr = nexthdr;
 671        state->frag_id = frag_id;
 672
 673        state->hlen = hlen;
 674        state->mtu = mtu;
 675
 676        state->left = skb->len - hlen;  /* Space per frame */
 677        state->ptr = hlen;              /* Where to start from */
 678
 679        state->hroom = hdr_room;
 680        state->troom = needed_tailroom;
 681
 682        state->offset = 0;
 683}
 684EXPORT_SYMBOL(ip6_frag_init);
 685
 686struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 687{
 688        u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 689        struct sk_buff *frag;
 690        struct frag_hdr *fh;
 691        unsigned int len;
 692
 693        len = state->left;
 694        /* IF: it doesn't fit, use 'mtu' - the data space left */
 695        if (len > state->mtu)
 696                len = state->mtu;
 697        /* IF: we are not sending up to and including the packet end
 698           then align the next start on an eight byte boundary */
 699        if (len < state->left)
 700                len &= ~7;
 701
 702        /* Allocate buffer */
 703        frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 704                         state->hroom + state->troom, GFP_ATOMIC);
 705        if (!frag)
 706                return ERR_PTR(-ENOMEM);
 707
 708        /*
 709         *      Set up data on packet
 710         */
 711
 712        ip6_copy_metadata(frag, skb);
 713        skb_reserve(frag, state->hroom);
 714        skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 715        skb_reset_network_header(frag);
 716        fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 717        frag->transport_header = (frag->network_header + state->hlen +
 718                                  sizeof(struct frag_hdr));
 719
 720        /*
 721         *      Charge the memory for the fragment to any owner
 722         *      it might possess
 723         */
 724        if (skb->sk)
 725                skb_set_owner_w(frag, skb->sk);
 726
 727        /*
 728         *      Copy the packet header into the new buffer.
 729         */
 730        skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 731
 732        fragnexthdr_offset = skb_network_header(frag);
 733        fragnexthdr_offset += prevhdr - skb_network_header(skb);
 734        *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 735
 736        /*
 737         *      Build fragment header.
 738         */
 739        fh->nexthdr = state->nexthdr;
 740        fh->reserved = 0;
 741        fh->identification = state->frag_id;
 742
 743        /*
 744         *      Copy a block of the IP datagram.
 745         */
 746        BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 747                             len));
 748        state->left -= len;
 749
 750        fh->frag_off = htons(state->offset);
 751        if (state->left > 0)
 752                fh->frag_off |= htons(IP6_MF);
 753        ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 754
 755        state->ptr += len;
 756        state->offset += len;
 757
 758        return frag;
 759}
 760EXPORT_SYMBOL(ip6_frag_next);
 761
 762int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 763                 int (*output)(struct net *, struct sock *, struct sk_buff *))
 764{
 765        struct sk_buff *frag;
 766        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 767        struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 768                                inet6_sk(skb->sk) : NULL;
 769        struct ip6_frag_state state;
 770        unsigned int mtu, hlen, nexthdr_offset;
 771        int hroom, err = 0;
 772        __be32 frag_id;
 773        u8 *prevhdr, nexthdr = 0;
 774
 775        err = ip6_find_1stfragopt(skb, &prevhdr);
 776        if (err < 0)
 777                goto fail;
 778        hlen = err;
 779        nexthdr = *prevhdr;
 780        nexthdr_offset = prevhdr - skb_network_header(skb);
 781
 782        mtu = ip6_skb_dst_mtu(skb);
 783
 784        /* We must not fragment if the socket is set to force MTU discovery
 785         * or if the skb it not generated by a local socket.
 786         */
 787        if (unlikely(!skb->ignore_df && skb->len > mtu))
 788                goto fail_toobig;
 789
 790        if (IP6CB(skb)->frag_max_size) {
 791                if (IP6CB(skb)->frag_max_size > mtu)
 792                        goto fail_toobig;
 793
 794                /* don't send fragments larger than what we received */
 795                mtu = IP6CB(skb)->frag_max_size;
 796                if (mtu < IPV6_MIN_MTU)
 797                        mtu = IPV6_MIN_MTU;
 798        }
 799
 800        if (np && np->frag_size < mtu) {
 801                if (np->frag_size)
 802                        mtu = np->frag_size;
 803        }
 804        if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 805                goto fail_toobig;
 806        mtu -= hlen + sizeof(struct frag_hdr);
 807
 808        frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 809                                    &ipv6_hdr(skb)->saddr);
 810
 811        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 812            (err = skb_checksum_help(skb)))
 813                goto fail;
 814
 815        prevhdr = skb_network_header(skb) + nexthdr_offset;
 816        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 817        if (skb_has_frag_list(skb)) {
 818                unsigned int first_len = skb_pagelen(skb);
 819                struct ip6_fraglist_iter iter;
 820                struct sk_buff *frag2;
 821
 822                if (first_len - hlen > mtu ||
 823                    ((first_len - hlen) & 7) ||
 824                    skb_cloned(skb) ||
 825                    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 826                        goto slow_path;
 827
 828                skb_walk_frags(skb, frag) {
 829                        /* Correct geometry. */
 830                        if (frag->len > mtu ||
 831                            ((frag->len & 7) && frag->next) ||
 832                            skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 833                                goto slow_path_clean;
 834
 835                        /* Partially cloned skb? */
 836                        if (skb_shared(frag))
 837                                goto slow_path_clean;
 838
 839                        BUG_ON(frag->sk);
 840                        if (skb->sk) {
 841                                frag->sk = skb->sk;
 842                                frag->destructor = sock_wfree;
 843                        }
 844                        skb->truesize -= frag->truesize;
 845                }
 846
 847                err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 848                                        &iter);
 849                if (err < 0)
 850                        goto fail;
 851
 852                for (;;) {
 853                        /* Prepare header of the next frame,
 854                         * before previous one went down. */
 855                        if (iter.frag)
 856                                ip6_fraglist_prepare(skb, &iter);
 857
 858                        err = output(net, sk, skb);
 859                        if (!err)
 860                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 861                                              IPSTATS_MIB_FRAGCREATES);
 862
 863                        if (err || !iter.frag)
 864                                break;
 865
 866                        skb = ip6_fraglist_next(&iter);
 867                }
 868
 869                kfree(iter.tmp_hdr);
 870
 871                if (err == 0) {
 872                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 873                                      IPSTATS_MIB_FRAGOKS);
 874                        return 0;
 875                }
 876
 877                kfree_skb_list(iter.frag);
 878
 879                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 880                              IPSTATS_MIB_FRAGFAILS);
 881                return err;
 882
 883slow_path_clean:
 884                skb_walk_frags(skb, frag2) {
 885                        if (frag2 == frag)
 886                                break;
 887                        frag2->sk = NULL;
 888                        frag2->destructor = NULL;
 889                        skb->truesize += frag2->truesize;
 890                }
 891        }
 892
 893slow_path:
 894        /*
 895         *      Fragment the datagram.
 896         */
 897
 898        ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 899                      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 900                      &state);
 901
 902        /*
 903         *      Keep copying data until we run out.
 904         */
 905
 906        while (state.left > 0) {
 907                frag = ip6_frag_next(skb, &state);
 908                if (IS_ERR(frag)) {
 909                        err = PTR_ERR(frag);
 910                        goto fail;
 911                }
 912
 913                /*
 914                 *      Put this fragment into the sending queue.
 915                 */
 916                err = output(net, sk, frag);
 917                if (err)
 918                        goto fail;
 919
 920                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 921                              IPSTATS_MIB_FRAGCREATES);
 922        }
 923        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 924                      IPSTATS_MIB_FRAGOKS);
 925        consume_skb(skb);
 926        return err;
 927
 928fail_toobig:
 929        if (skb->sk && dst_allfrag(skb_dst(skb)))
 930                sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 931
 932        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 933        err = -EMSGSIZE;
 934
 935fail:
 936        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 937                      IPSTATS_MIB_FRAGFAILS);
 938        kfree_skb(skb);
 939        return err;
 940}
 941
 942static inline int ip6_rt_check(const struct rt6key *rt_key,
 943                               const struct in6_addr *fl_addr,
 944                               const struct in6_addr *addr_cache)
 945{
 946        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 947                (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 948}
 949
 950static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 951                                          struct dst_entry *dst,
 952                                          const struct flowi6 *fl6)
 953{
 954        struct ipv6_pinfo *np = inet6_sk(sk);
 955        struct rt6_info *rt;
 956
 957        if (!dst)
 958                goto out;
 959
 960        if (dst->ops->family != AF_INET6) {
 961                dst_release(dst);
 962                return NULL;
 963        }
 964
 965        rt = (struct rt6_info *)dst;
 966        /* Yes, checking route validity in not connected
 967         * case is not very simple. Take into account,
 968         * that we do not support routing by source, TOS,
 969         * and MSG_DONTROUTE            --ANK (980726)
 970         *
 971         * 1. ip6_rt_check(): If route was host route,
 972         *    check that cached destination is current.
 973         *    If it is network route, we still may
 974         *    check its validity using saved pointer
 975         *    to the last used address: daddr_cache.
 976         *    We do not want to save whole address now,
 977         *    (because main consumer of this service
 978         *    is tcp, which has not this problem),
 979         *    so that the last trick works only on connected
 980         *    sockets.
 981         * 2. oif also should be the same.
 982         */
 983        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 984#ifdef CONFIG_IPV6_SUBTREES
 985            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 986#endif
 987           (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 988              (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 989                dst_release(dst);
 990                dst = NULL;
 991        }
 992
 993out:
 994        return dst;
 995}
 996
 997static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 998                               struct dst_entry **dst, struct flowi6 *fl6)
 999{
1000#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1001        struct neighbour *n;
1002        struct rt6_info *rt;
1003#endif
1004        int err;
1005        int flags = 0;
1006
1007        /* The correct way to handle this would be to do
1008         * ip6_route_get_saddr, and then ip6_route_output; however,
1009         * the route-specific preferred source forces the
1010         * ip6_route_output call _before_ ip6_route_get_saddr.
1011         *
1012         * In source specific routing (no src=any default route),
1013         * ip6_route_output will fail given src=any saddr, though, so
1014         * that's why we try it again later.
1015         */
1016        if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1017                struct fib6_info *from;
1018                struct rt6_info *rt;
1019                bool had_dst = *dst != NULL;
1020
1021                if (!had_dst)
1022                        *dst = ip6_route_output(net, sk, fl6);
1023                rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1024
1025                rcu_read_lock();
1026                from = rt ? rcu_dereference(rt->from) : NULL;
1027                err = ip6_route_get_saddr(net, from, &fl6->daddr,
1028                                          sk ? inet6_sk(sk)->srcprefs : 0,
1029                                          &fl6->saddr);
1030                rcu_read_unlock();
1031
1032                if (err)
1033                        goto out_err_release;
1034
1035                /* If we had an erroneous initial result, pretend it
1036                 * never existed and let the SA-enabled version take
1037                 * over.
1038                 */
1039                if (!had_dst && (*dst)->error) {
1040                        dst_release(*dst);
1041                        *dst = NULL;
1042                }
1043
1044                if (fl6->flowi6_oif)
1045                        flags |= RT6_LOOKUP_F_IFACE;
1046        }
1047
1048        if (!*dst)
1049                *dst = ip6_route_output_flags(net, sk, fl6, flags);
1050
1051        err = (*dst)->error;
1052        if (err)
1053                goto out_err_release;
1054
1055#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1056        /*
1057         * Here if the dst entry we've looked up
1058         * has a neighbour entry that is in the INCOMPLETE
1059         * state and the src address from the flow is
1060         * marked as OPTIMISTIC, we release the found
1061         * dst entry and replace it instead with the
1062         * dst entry of the nexthop router
1063         */
1064        rt = (struct rt6_info *) *dst;
1065        rcu_read_lock_bh();
1066        n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1067                                      rt6_nexthop(rt, &fl6->daddr));
1068        err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1069        rcu_read_unlock_bh();
1070
1071        if (err) {
1072                struct inet6_ifaddr *ifp;
1073                struct flowi6 fl_gw6;
1074                int redirect;
1075
1076                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1077                                      (*dst)->dev, 1);
1078
1079                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1080                if (ifp)
1081                        in6_ifa_put(ifp);
1082
1083                if (redirect) {
1084                        /*
1085                         * We need to get the dst entry for the
1086                         * default router instead
1087                         */
1088                        dst_release(*dst);
1089                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1090                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1091                        *dst = ip6_route_output(net, sk, &fl_gw6);
1092                        err = (*dst)->error;
1093                        if (err)
1094                                goto out_err_release;
1095                }
1096        }
1097#endif
1098        if (ipv6_addr_v4mapped(&fl6->saddr) &&
1099            !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1100                err = -EAFNOSUPPORT;
1101                goto out_err_release;
1102        }
1103
1104        return 0;
1105
1106out_err_release:
1107        dst_release(*dst);
1108        *dst = NULL;
1109
1110        if (err == -ENETUNREACH)
1111                IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1112        return err;
1113}
1114
1115/**
1116 *      ip6_dst_lookup - perform route lookup on flow
1117 *      @sk: socket which provides route info
1118 *      @dst: pointer to dst_entry * for result
1119 *      @fl6: flow to lookup
1120 *
1121 *      This function performs a route lookup on the given flow.
1122 *
1123 *      It returns zero on success, or a standard errno code on error.
1124 */
1125int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1126                   struct flowi6 *fl6)
1127{
1128        *dst = NULL;
1129        return ip6_dst_lookup_tail(net, sk, dst, fl6);
1130}
1131EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1132
1133/**
1134 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1135 *      @sk: socket which provides route info
1136 *      @fl6: flow to lookup
1137 *      @final_dst: final destination address for ipsec lookup
1138 *
1139 *      This function performs a route lookup on the given flow.
1140 *
1141 *      It returns a valid dst pointer on success, or a pointer encoded
1142 *      error code.
1143 */
1144struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1145                                      const struct in6_addr *final_dst)
1146{
1147        struct dst_entry *dst = NULL;
1148        int err;
1149
1150        err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1151        if (err)
1152                return ERR_PTR(err);
1153        if (final_dst)
1154                fl6->daddr = *final_dst;
1155
1156        return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1157}
1158EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1159
1160/**
1161 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1162 *      @sk: socket which provides the dst cache and route info
1163 *      @fl6: flow to lookup
1164 *      @final_dst: final destination address for ipsec lookup
1165 *      @connected: whether @sk is connected or not
1166 *
1167 *      This function performs a route lookup on the given flow with the
1168 *      possibility of using the cached route in the socket if it is valid.
1169 *      It will take the socket dst lock when operating on the dst cache.
1170 *      As a result, this function can only be used in process context.
1171 *
1172 *      In addition, for a connected socket, cache the dst in the socket
1173 *      if the current cache is not valid.
1174 *
1175 *      It returns a valid dst pointer on success, or a pointer encoded
1176 *      error code.
1177 */
1178struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1179                                         const struct in6_addr *final_dst,
1180                                         bool connected)
1181{
1182        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1183
1184        dst = ip6_sk_dst_check(sk, dst, fl6);
1185        if (dst)
1186                return dst;
1187
1188        dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1189        if (connected && !IS_ERR(dst))
1190                ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1191
1192        return dst;
1193}
1194EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1195
1196static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1197                                               gfp_t gfp)
1198{
1199        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1200}
1201
1202static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1203                                                gfp_t gfp)
1204{
1205        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1206}
1207
1208static void ip6_append_data_mtu(unsigned int *mtu,
1209                                int *maxfraglen,
1210                                unsigned int fragheaderlen,
1211                                struct sk_buff *skb,
1212                                struct rt6_info *rt,
1213                                unsigned int orig_mtu)
1214{
1215        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1216                if (!skb) {
1217                        /* first fragment, reserve header_len */
1218                        *mtu = orig_mtu - rt->dst.header_len;
1219
1220                } else {
1221                        /*
1222                         * this fragment is not first, the headers
1223                         * space is regarded as data space.
1224                         */
1225                        *mtu = orig_mtu;
1226                }
1227                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1228                              + fragheaderlen - sizeof(struct frag_hdr);
1229        }
1230}
1231
1232static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1233                          struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1234                          struct rt6_info *rt, struct flowi6 *fl6)
1235{
1236        struct ipv6_pinfo *np = inet6_sk(sk);
1237        unsigned int mtu;
1238        struct ipv6_txoptions *opt = ipc6->opt;
1239
1240        /*
1241         * setup for corking
1242         */
1243        if (opt) {
1244                if (WARN_ON(v6_cork->opt))
1245                        return -EINVAL;
1246
1247                v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1248                if (unlikely(!v6_cork->opt))
1249                        return -ENOBUFS;
1250
1251                v6_cork->opt->tot_len = sizeof(*opt);
1252                v6_cork->opt->opt_flen = opt->opt_flen;
1253                v6_cork->opt->opt_nflen = opt->opt_nflen;
1254
1255                v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1256                                                    sk->sk_allocation);
1257                if (opt->dst0opt && !v6_cork->opt->dst0opt)
1258                        return -ENOBUFS;
1259
1260                v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1261                                                    sk->sk_allocation);
1262                if (opt->dst1opt && !v6_cork->opt->dst1opt)
1263                        return -ENOBUFS;
1264
1265                v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1266                                                   sk->sk_allocation);
1267                if (opt->hopopt && !v6_cork->opt->hopopt)
1268                        return -ENOBUFS;
1269
1270                v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1271                                                    sk->sk_allocation);
1272                if (opt->srcrt && !v6_cork->opt->srcrt)
1273                        return -ENOBUFS;
1274
1275                /* need source address above miyazawa*/
1276        }
1277        dst_hold(&rt->dst);
1278        cork->base.dst = &rt->dst;
1279        cork->fl.u.ip6 = *fl6;
1280        v6_cork->hop_limit = ipc6->hlimit;
1281        v6_cork->tclass = ipc6->tclass;
1282        if (rt->dst.flags & DST_XFRM_TUNNEL)
1283                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1284                      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1285        else
1286                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287                        READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1288        if (np->frag_size < mtu) {
1289                if (np->frag_size)
1290                        mtu = np->frag_size;
1291        }
1292        if (mtu < IPV6_MIN_MTU)
1293                return -EINVAL;
1294        cork->base.fragsize = mtu;
1295        cork->base.gso_size = ipc6->gso_size;
1296        cork->base.tx_flags = 0;
1297        sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1298
1299        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1300                cork->base.flags |= IPCORK_ALLFRAG;
1301        cork->base.length = 0;
1302
1303        cork->base.transmit_time = ipc6->sockc.transmit_time;
1304
1305        return 0;
1306}
1307
1308static int __ip6_append_data(struct sock *sk,
1309                             struct flowi6 *fl6,
1310                             struct sk_buff_head *queue,
1311                             struct inet_cork *cork,
1312                             struct inet6_cork *v6_cork,
1313                             struct page_frag *pfrag,
1314                             int getfrag(void *from, char *to, int offset,
1315                                         int len, int odd, struct sk_buff *skb),
1316                             void *from, int length, int transhdrlen,
1317                             unsigned int flags, struct ipcm6_cookie *ipc6)
1318{
1319        struct sk_buff *skb, *skb_prev = NULL;
1320        unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1321        struct ubuf_info *uarg = NULL;
1322        int exthdrlen = 0;
1323        int dst_exthdrlen = 0;
1324        int hh_len;
1325        int copy;
1326        int err;
1327        int offset = 0;
1328        u32 tskey = 0;
1329        struct rt6_info *rt = (struct rt6_info *)cork->dst;
1330        struct ipv6_txoptions *opt = v6_cork->opt;
1331        int csummode = CHECKSUM_NONE;
1332        unsigned int maxnonfragsize, headersize;
1333        unsigned int wmem_alloc_delta = 0;
1334        bool paged, extra_uref = false;
1335
1336        skb = skb_peek_tail(queue);
1337        if (!skb) {
1338                exthdrlen = opt ? opt->opt_flen : 0;
1339                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1340        }
1341
1342        paged = !!cork->gso_size;
1343        mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1344        orig_mtu = mtu;
1345
1346        if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1347            sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1348                tskey = sk->sk_tskey++;
1349
1350        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1351
1352        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1353                        (opt ? opt->opt_nflen : 0);
1354        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1355                     sizeof(struct frag_hdr);
1356
1357        headersize = sizeof(struct ipv6hdr) +
1358                     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1359                     (dst_allfrag(&rt->dst) ?
1360                      sizeof(struct frag_hdr) : 0) +
1361                     rt->rt6i_nfheader_len;
1362
1363        /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1364         * the first fragment
1365         */
1366        if (headersize + transhdrlen > mtu)
1367                goto emsgsize;
1368
1369        if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1370            (sk->sk_protocol == IPPROTO_UDP ||
1371             sk->sk_protocol == IPPROTO_RAW)) {
1372                ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1373                                sizeof(struct ipv6hdr));
1374                goto emsgsize;
1375        }
1376
1377        if (ip6_sk_ignore_df(sk))
1378                maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1379        else
1380                maxnonfragsize = mtu;
1381
1382        if (cork->length + length > maxnonfragsize - headersize) {
1383emsgsize:
1384                pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1385                ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1386                return -EMSGSIZE;
1387        }
1388
1389        /* CHECKSUM_PARTIAL only with no extension headers and when
1390         * we are not going to fragment
1391         */
1392        if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1393            headersize == sizeof(struct ipv6hdr) &&
1394            length <= mtu - headersize &&
1395            (!(flags & MSG_MORE) || cork->gso_size) &&
1396            rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1397                csummode = CHECKSUM_PARTIAL;
1398
1399        if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1400                uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1401                if (!uarg)
1402                        return -ENOBUFS;
1403                extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1404                if (rt->dst.dev->features & NETIF_F_SG &&
1405                    csummode == CHECKSUM_PARTIAL) {
1406                        paged = true;
1407                } else {
1408                        uarg->zerocopy = 0;
1409                        skb_zcopy_set(skb, uarg, &extra_uref);
1410                }
1411        }
1412
1413        /*
1414         * Let's try using as much space as possible.
1415         * Use MTU if total length of the message fits into the MTU.
1416         * Otherwise, we need to reserve fragment header and
1417         * fragment alignment (= 8-15 octects, in total).
1418         *
1419         * Note that we may need to "move" the data from the tail of
1420         * of the buffer to the new fragment when we split
1421         * the message.
1422         *
1423         * FIXME: It may be fragmented into multiple chunks
1424         *        at once if non-fragmentable extension headers
1425         *        are too large.
1426         * --yoshfuji
1427         */
1428
1429        cork->length += length;
1430        if (!skb)
1431                goto alloc_new_skb;
1432
1433        while (length > 0) {
1434                /* Check if the remaining data fits into current packet. */
1435                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1436                if (copy < length)
1437                        copy = maxfraglen - skb->len;
1438
1439                if (copy <= 0) {
1440                        char *data;
1441                        unsigned int datalen;
1442                        unsigned int fraglen;
1443                        unsigned int fraggap;
1444                        unsigned int alloclen;
1445                        unsigned int pagedlen;
1446alloc_new_skb:
1447                        /* There's no room in the current skb */
1448                        if (skb)
1449                                fraggap = skb->len - maxfraglen;
1450                        else
1451                                fraggap = 0;
1452                        /* update mtu and maxfraglen if necessary */
1453                        if (!skb || !skb_prev)
1454                                ip6_append_data_mtu(&mtu, &maxfraglen,
1455                                                    fragheaderlen, skb, rt,
1456                                                    orig_mtu);
1457
1458                        skb_prev = skb;
1459
1460                        /*
1461                         * If remaining data exceeds the mtu,
1462                         * we know we need more fragment(s).
1463                         */
1464                        datalen = length + fraggap;
1465
1466                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1467                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1468                        fraglen = datalen + fragheaderlen;
1469                        pagedlen = 0;
1470
1471                        if ((flags & MSG_MORE) &&
1472                            !(rt->dst.dev->features&NETIF_F_SG))
1473                                alloclen = mtu;
1474                        else if (!paged)
1475                                alloclen = fraglen;
1476                        else {
1477                                alloclen = min_t(int, fraglen, MAX_HEADER);
1478                                pagedlen = fraglen - alloclen;
1479                        }
1480
1481                        alloclen += dst_exthdrlen;
1482
1483                        if (datalen != length + fraggap) {
1484                                /*
1485                                 * this is not the last fragment, the trailer
1486                                 * space is regarded as data space.
1487                                 */
1488                                datalen += rt->dst.trailer_len;
1489                        }
1490
1491                        alloclen += rt->dst.trailer_len;
1492                        fraglen = datalen + fragheaderlen;
1493
1494                        /*
1495                         * We just reserve space for fragment header.
1496                         * Note: this may be overallocation if the message
1497                         * (without MSG_MORE) fits into the MTU.
1498                         */
1499                        alloclen += sizeof(struct frag_hdr);
1500
1501                        copy = datalen - transhdrlen - fraggap - pagedlen;
1502                        if (copy < 0) {
1503                                err = -EINVAL;
1504                                goto error;
1505                        }
1506                        if (transhdrlen) {
1507                                skb = sock_alloc_send_skb(sk,
1508                                                alloclen + hh_len,
1509                                                (flags & MSG_DONTWAIT), &err);
1510                        } else {
1511                                skb = NULL;
1512                                if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1513                                    2 * sk->sk_sndbuf)
1514                                        skb = alloc_skb(alloclen + hh_len,
1515                                                        sk->sk_allocation);
1516                                if (unlikely(!skb))
1517                                        err = -ENOBUFS;
1518                        }
1519                        if (!skb)
1520                                goto error;
1521                        /*
1522                         *      Fill in the control structures
1523                         */
1524                        skb->protocol = htons(ETH_P_IPV6);
1525                        skb->ip_summed = csummode;
1526                        skb->csum = 0;
1527                        /* reserve for fragmentation and ipsec header */
1528                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1529                                    dst_exthdrlen);
1530
1531                        /*
1532                         *      Find where to start putting bytes
1533                         */
1534                        data = skb_put(skb, fraglen - pagedlen);
1535                        skb_set_network_header(skb, exthdrlen);
1536                        data += fragheaderlen;
1537                        skb->transport_header = (skb->network_header +
1538                                                 fragheaderlen);
1539                        if (fraggap) {
1540                                skb->csum = skb_copy_and_csum_bits(
1541                                        skb_prev, maxfraglen,
1542                                        data + transhdrlen, fraggap, 0);
1543                                skb_prev->csum = csum_sub(skb_prev->csum,
1544                                                          skb->csum);
1545                                data += fraggap;
1546                                pskb_trim_unique(skb_prev, maxfraglen);
1547                        }
1548                        if (copy > 0 &&
1549                            getfrag(from, data + transhdrlen, offset,
1550                                    copy, fraggap, skb) < 0) {
1551                                err = -EFAULT;
1552                                kfree_skb(skb);
1553                                goto error;
1554                        }
1555
1556                        offset += copy;
1557                        length -= copy + transhdrlen;
1558                        transhdrlen = 0;
1559                        exthdrlen = 0;
1560                        dst_exthdrlen = 0;
1561
1562                        /* Only the initial fragment is time stamped */
1563                        skb_shinfo(skb)->tx_flags = cork->tx_flags;
1564                        cork->tx_flags = 0;
1565                        skb_shinfo(skb)->tskey = tskey;
1566                        tskey = 0;
1567                        skb_zcopy_set(skb, uarg, &extra_uref);
1568
1569                        if ((flags & MSG_CONFIRM) && !skb_prev)
1570                                skb_set_dst_pending_confirm(skb, 1);
1571
1572                        /*
1573                         * Put the packet on the pending queue
1574                         */
1575                        if (!skb->destructor) {
1576                                skb->destructor = sock_wfree;
1577                                skb->sk = sk;
1578                                wmem_alloc_delta += skb->truesize;
1579                        }
1580                        __skb_queue_tail(queue, skb);
1581                        continue;
1582                }
1583
1584                if (copy > length)
1585                        copy = length;
1586
1587                if (!(rt->dst.dev->features&NETIF_F_SG) &&
1588                    skb_tailroom(skb) >= copy) {
1589                        unsigned int off;
1590
1591                        off = skb->len;
1592                        if (getfrag(from, skb_put(skb, copy),
1593                                                offset, copy, off, skb) < 0) {
1594                                __skb_trim(skb, off);
1595                                err = -EFAULT;
1596                                goto error;
1597                        }
1598                } else if (!uarg || !uarg->zerocopy) {
1599                        int i = skb_shinfo(skb)->nr_frags;
1600
1601                        err = -ENOMEM;
1602                        if (!sk_page_frag_refill(sk, pfrag))
1603                                goto error;
1604
1605                        if (!skb_can_coalesce(skb, i, pfrag->page,
1606                                              pfrag->offset)) {
1607                                err = -EMSGSIZE;
1608                                if (i == MAX_SKB_FRAGS)
1609                                        goto error;
1610
1611                                __skb_fill_page_desc(skb, i, pfrag->page,
1612                                                     pfrag->offset, 0);
1613                                skb_shinfo(skb)->nr_frags = ++i;
1614                                get_page(pfrag->page);
1615                        }
1616                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1617                        if (getfrag(from,
1618                                    page_address(pfrag->page) + pfrag->offset,
1619                                    offset, copy, skb->len, skb) < 0)
1620                                goto error_efault;
1621
1622                        pfrag->offset += copy;
1623                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1624                        skb->len += copy;
1625                        skb->data_len += copy;
1626                        skb->truesize += copy;
1627                        wmem_alloc_delta += copy;
1628                } else {
1629                        err = skb_zerocopy_iter_dgram(skb, from, copy);
1630                        if (err < 0)
1631                                goto error;
1632                }
1633                offset += copy;
1634                length -= copy;
1635        }
1636
1637        if (wmem_alloc_delta)
1638                refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1639        return 0;
1640
1641error_efault:
1642        err = -EFAULT;
1643error:
1644        if (uarg)
1645                sock_zerocopy_put_abort(uarg, extra_uref);
1646        cork->length -= length;
1647        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1648        refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1649        return err;
1650}
1651
1652int ip6_append_data(struct sock *sk,
1653                    int getfrag(void *from, char *to, int offset, int len,
1654                                int odd, struct sk_buff *skb),
1655                    void *from, int length, int transhdrlen,
1656                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1657                    struct rt6_info *rt, unsigned int flags)
1658{
1659        struct inet_sock *inet = inet_sk(sk);
1660        struct ipv6_pinfo *np = inet6_sk(sk);
1661        int exthdrlen;
1662        int err;
1663
1664        if (flags&MSG_PROBE)
1665                return 0;
1666        if (skb_queue_empty(&sk->sk_write_queue)) {
1667                /*
1668                 * setup for corking
1669                 */
1670                err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1671                                     ipc6, rt, fl6);
1672                if (err)
1673                        return err;
1674
1675                exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1676                length += exthdrlen;
1677                transhdrlen += exthdrlen;
1678        } else {
1679                fl6 = &inet->cork.fl.u.ip6;
1680                transhdrlen = 0;
1681        }
1682
1683        return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1684                                 &np->cork, sk_page_frag(sk), getfrag,
1685                                 from, length, transhdrlen, flags, ipc6);
1686}
1687EXPORT_SYMBOL_GPL(ip6_append_data);
1688
1689static void ip6_cork_release(struct inet_cork_full *cork,
1690                             struct inet6_cork *v6_cork)
1691{
1692        if (v6_cork->opt) {
1693                kfree(v6_cork->opt->dst0opt);
1694                kfree(v6_cork->opt->dst1opt);
1695                kfree(v6_cork->opt->hopopt);
1696                kfree(v6_cork->opt->srcrt);
1697                kfree(v6_cork->opt);
1698                v6_cork->opt = NULL;
1699        }
1700
1701        if (cork->base.dst) {
1702                dst_release(cork->base.dst);
1703                cork->base.dst = NULL;
1704                cork->base.flags &= ~IPCORK_ALLFRAG;
1705        }
1706        memset(&cork->fl, 0, sizeof(cork->fl));
1707}
1708
1709struct sk_buff *__ip6_make_skb(struct sock *sk,
1710                               struct sk_buff_head *queue,
1711                               struct inet_cork_full *cork,
1712                               struct inet6_cork *v6_cork)
1713{
1714        struct sk_buff *skb, *tmp_skb;
1715        struct sk_buff **tail_skb;
1716        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1717        struct ipv6_pinfo *np = inet6_sk(sk);
1718        struct net *net = sock_net(sk);
1719        struct ipv6hdr *hdr;
1720        struct ipv6_txoptions *opt = v6_cork->opt;
1721        struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1722        struct flowi6 *fl6 = &cork->fl.u.ip6;
1723        unsigned char proto = fl6->flowi6_proto;
1724
1725        skb = __skb_dequeue(queue);
1726        if (!skb)
1727                goto out;
1728        tail_skb = &(skb_shinfo(skb)->frag_list);
1729
1730        /* move skb->data to ip header from ext header */
1731        if (skb->data < skb_network_header(skb))
1732                __skb_pull(skb, skb_network_offset(skb));
1733        while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1734                __skb_pull(tmp_skb, skb_network_header_len(skb));
1735                *tail_skb = tmp_skb;
1736                tail_skb = &(tmp_skb->next);
1737                skb->len += tmp_skb->len;
1738                skb->data_len += tmp_skb->len;
1739                skb->truesize += tmp_skb->truesize;
1740                tmp_skb->destructor = NULL;
1741                tmp_skb->sk = NULL;
1742        }
1743
1744        /* Allow local fragmentation. */
1745        skb->ignore_df = ip6_sk_ignore_df(sk);
1746
1747        *final_dst = fl6->daddr;
1748        __skb_pull(skb, skb_network_header_len(skb));
1749        if (opt && opt->opt_flen)
1750                ipv6_push_frag_opts(skb, opt, &proto);
1751        if (opt && opt->opt_nflen)
1752                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1753
1754        skb_push(skb, sizeof(struct ipv6hdr));
1755        skb_reset_network_header(skb);
1756        hdr = ipv6_hdr(skb);
1757
1758        ip6_flow_hdr(hdr, v6_cork->tclass,
1759                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1760                                        ip6_autoflowlabel(net, np), fl6));
1761        hdr->hop_limit = v6_cork->hop_limit;
1762        hdr->nexthdr = proto;
1763        hdr->saddr = fl6->saddr;
1764        hdr->daddr = *final_dst;
1765
1766        skb->priority = sk->sk_priority;
1767        skb->mark = sk->sk_mark;
1768
1769        skb->tstamp = cork->base.transmit_time;
1770
1771        skb_dst_set(skb, dst_clone(&rt->dst));
1772        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1773        if (proto == IPPROTO_ICMPV6) {
1774                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1775
1776                ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1777                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1778        }
1779
1780        ip6_cork_release(cork, v6_cork);
1781out:
1782        return skb;
1783}
1784
1785int ip6_send_skb(struct sk_buff *skb)
1786{
1787        struct net *net = sock_net(skb->sk);
1788        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1789        int err;
1790
1791        err = ip6_local_out(net, skb->sk, skb);
1792        if (err) {
1793                if (err > 0)
1794                        err = net_xmit_errno(err);
1795                if (err)
1796                        IP6_INC_STATS(net, rt->rt6i_idev,
1797                                      IPSTATS_MIB_OUTDISCARDS);
1798        }
1799
1800        return err;
1801}
1802
1803int ip6_push_pending_frames(struct sock *sk)
1804{
1805        struct sk_buff *skb;
1806
1807        skb = ip6_finish_skb(sk);
1808        if (!skb)
1809                return 0;
1810
1811        return ip6_send_skb(skb);
1812}
1813EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1814
1815static void __ip6_flush_pending_frames(struct sock *sk,
1816                                       struct sk_buff_head *queue,
1817                                       struct inet_cork_full *cork,
1818                                       struct inet6_cork *v6_cork)
1819{
1820        struct sk_buff *skb;
1821
1822        while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1823                if (skb_dst(skb))
1824                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1825                                      IPSTATS_MIB_OUTDISCARDS);
1826                kfree_skb(skb);
1827        }
1828
1829        ip6_cork_release(cork, v6_cork);
1830}
1831
1832void ip6_flush_pending_frames(struct sock *sk)
1833{
1834        __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1835                                   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1836}
1837EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1838
1839struct sk_buff *ip6_make_skb(struct sock *sk,
1840                             int getfrag(void *from, char *to, int offset,
1841                                         int len, int odd, struct sk_buff *skb),
1842                             void *from, int length, int transhdrlen,
1843                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1844                             struct rt6_info *rt, unsigned int flags,
1845                             struct inet_cork_full *cork)
1846{
1847        struct inet6_cork v6_cork;
1848        struct sk_buff_head queue;
1849        int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1850        int err;
1851
1852        if (flags & MSG_PROBE)
1853                return NULL;
1854
1855        __skb_queue_head_init(&queue);
1856
1857        cork->base.flags = 0;
1858        cork->base.addr = 0;
1859        cork->base.opt = NULL;
1860        cork->base.dst = NULL;
1861        v6_cork.opt = NULL;
1862        err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1863        if (err) {
1864                ip6_cork_release(cork, &v6_cork);
1865                return ERR_PTR(err);
1866        }
1867        if (ipc6->dontfrag < 0)
1868                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1869
1870        err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1871                                &current->task_frag, getfrag, from,
1872                                length + exthdrlen, transhdrlen + exthdrlen,
1873                                flags, ipc6);
1874        if (err) {
1875                __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1876                return ERR_PTR(err);
1877        }
1878
1879        return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1880}
1881