linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/bpf-cgroup.h>
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58#include <linux/mroute6.h>
  59#include <net/l3mdev.h>
  60#include <net/lwtunnel.h>
  61#include <net/ip_tunnels.h>
  62
  63static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  64{
  65        struct dst_entry *dst = skb_dst(skb);
  66        struct net_device *dev = dst->dev;
  67        const struct in6_addr *nexthop;
  68        struct neighbour *neigh;
  69        int ret;
  70
  71        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  72                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  73
  74                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  75                    ((mroute6_is_socket(net, skb) &&
  76                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  77                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  78                                         &ipv6_hdr(skb)->saddr))) {
  79                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  80
  81                        /* Do not check for IFF_ALLMULTI; multicast routing
  82                           is not supported in any case.
  83                         */
  84                        if (newskb)
  85                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  86                                        net, sk, newskb, NULL, newskb->dev,
  87                                        dev_loopback_xmit);
  88
  89                        if (ipv6_hdr(skb)->hop_limit == 0) {
  90                                IP6_INC_STATS(net, idev,
  91                                              IPSTATS_MIB_OUTDISCARDS);
  92                                kfree_skb(skb);
  93                                return 0;
  94                        }
  95                }
  96
  97                IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  98
  99                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 100                    IPV6_ADDR_SCOPE_NODELOCAL &&
 101                    !(dev->flags & IFF_LOOPBACK)) {
 102                        kfree_skb(skb);
 103                        return 0;
 104                }
 105        }
 106
 107        if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 108                int res = lwtunnel_xmit(skb);
 109
 110                if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 111                        return res;
 112        }
 113
 114        rcu_read_lock_bh();
 115        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 116        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 117        if (unlikely(!neigh))
 118                neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 119        if (!IS_ERR(neigh)) {
 120                sock_confirm_neigh(skb, neigh);
 121                ret = neigh_output(neigh, skb);
 122                rcu_read_unlock_bh();
 123                return ret;
 124        }
 125        rcu_read_unlock_bh();
 126
 127        IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 128        kfree_skb(skb);
 129        return -EINVAL;
 130}
 131
 132static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 133{
 134#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 135        /* Policy lookup after SNAT yielded a new policy */
 136        if (skb_dst(skb)->xfrm) {
 137                IPCB(skb)->flags |= IPSKB_REROUTED;
 138                return dst_output(net, sk, skb);
 139        }
 140#endif
 141
 142        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 143            dst_allfrag(skb_dst(skb)) ||
 144            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 145                return ip6_fragment(net, sk, skb, ip6_finish_output2);
 146        else
 147                return ip6_finish_output2(net, sk, skb);
 148}
 149
 150static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 151{
 152        int ret;
 153
 154        ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 155        switch (ret) {
 156        case NET_XMIT_SUCCESS:
 157                return __ip6_finish_output(net, sk, skb);
 158        case NET_XMIT_CN:
 159                return __ip6_finish_output(net, sk, skb) ? : ret;
 160        default:
 161                kfree_skb(skb);
 162                return ret;
 163        }
 164}
 165
 166int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 167{
 168        struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 169        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 170
 171        skb->protocol = htons(ETH_P_IPV6);
 172        skb->dev = dev;
 173
 174        if (unlikely(idev->cnf.disable_ipv6)) {
 175                IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 176                kfree_skb(skb);
 177                return 0;
 178        }
 179
 180        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 181                            net, sk, skb, indev, dev,
 182                            ip6_finish_output,
 183                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 184}
 185
 186bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 187{
 188        if (!np->autoflowlabel_set)
 189                return ip6_default_np_autolabel(net);
 190        else
 191                return np->autoflowlabel;
 192}
 193
 194/*
 195 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 196 * Note : socket lock is not held for SYNACK packets, but might be modified
 197 * by calls to skb_set_owner_w() and ipv6_local_error(),
 198 * which are using proper atomic operations or spinlocks.
 199 */
 200int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 201             __u32 mark, struct ipv6_txoptions *opt, int tclass)
 202{
 203        struct net *net = sock_net(sk);
 204        const struct ipv6_pinfo *np = inet6_sk(sk);
 205        struct in6_addr *first_hop = &fl6->daddr;
 206        struct dst_entry *dst = skb_dst(skb);
 207        unsigned int head_room;
 208        struct ipv6hdr *hdr;
 209        u8  proto = fl6->flowi6_proto;
 210        int seg_len = skb->len;
 211        int hlimit = -1;
 212        u32 mtu;
 213
 214        head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 215        if (opt)
 216                head_room += opt->opt_nflen + opt->opt_flen;
 217
 218        if (unlikely(skb_headroom(skb) < head_room)) {
 219                struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 220                if (!skb2) {
 221                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 222                                      IPSTATS_MIB_OUTDISCARDS);
 223                        kfree_skb(skb);
 224                        return -ENOBUFS;
 225                }
 226                if (skb->sk)
 227                        skb_set_owner_w(skb2, skb->sk);
 228                consume_skb(skb);
 229                skb = skb2;
 230        }
 231
 232        if (opt) {
 233                seg_len += opt->opt_nflen + opt->opt_flen;
 234
 235                if (opt->opt_flen)
 236                        ipv6_push_frag_opts(skb, opt, &proto);
 237
 238                if (opt->opt_nflen)
 239                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 240                                             &fl6->saddr);
 241        }
 242
 243        skb_push(skb, sizeof(struct ipv6hdr));
 244        skb_reset_network_header(skb);
 245        hdr = ipv6_hdr(skb);
 246
 247        /*
 248         *      Fill in the IPv6 header
 249         */
 250        if (np)
 251                hlimit = np->hop_limit;
 252        if (hlimit < 0)
 253                hlimit = ip6_dst_hoplimit(dst);
 254
 255        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 256                                ip6_autoflowlabel(net, np), fl6));
 257
 258        hdr->payload_len = htons(seg_len);
 259        hdr->nexthdr = proto;
 260        hdr->hop_limit = hlimit;
 261
 262        hdr->saddr = fl6->saddr;
 263        hdr->daddr = *first_hop;
 264
 265        skb->protocol = htons(ETH_P_IPV6);
 266        skb->priority = sk->sk_priority;
 267        skb->mark = mark;
 268
 269        mtu = dst_mtu(dst);
 270        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 271                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 272                              IPSTATS_MIB_OUT, skb->len);
 273
 274                /* if egress device is enslaved to an L3 master device pass the
 275                 * skb to its handler for processing
 276                 */
 277                skb = l3mdev_ip6_out((struct sock *)sk, skb);
 278                if (unlikely(!skb))
 279                        return 0;
 280
 281                /* hooks should never assume socket lock is held.
 282                 * we promote our socket to non const
 283                 */
 284                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 285                               net, (struct sock *)sk, skb, NULL, dst->dev,
 286                               dst_output);
 287        }
 288
 289        skb->dev = dst->dev;
 290        /* ipv6_local_error() does not require socket lock,
 291         * we promote our socket to non const
 292         */
 293        ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 294
 295        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 296        kfree_skb(skb);
 297        return -EMSGSIZE;
 298}
 299EXPORT_SYMBOL(ip6_xmit);
 300
 301static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 302{
 303        struct ip6_ra_chain *ra;
 304        struct sock *last = NULL;
 305
 306        read_lock(&ip6_ra_lock);
 307        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 308                struct sock *sk = ra->sk;
 309                if (sk && ra->sel == sel &&
 310                    (!sk->sk_bound_dev_if ||
 311                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 312                        if (last) {
 313                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 314                                if (skb2)
 315                                        rawv6_rcv(last, skb2);
 316                        }
 317                        last = sk;
 318                }
 319        }
 320
 321        if (last) {
 322                rawv6_rcv(last, skb);
 323                read_unlock(&ip6_ra_lock);
 324                return 1;
 325        }
 326        read_unlock(&ip6_ra_lock);
 327        return 0;
 328}
 329
 330static int ip6_forward_proxy_check(struct sk_buff *skb)
 331{
 332        struct ipv6hdr *hdr = ipv6_hdr(skb);
 333        u8 nexthdr = hdr->nexthdr;
 334        __be16 frag_off;
 335        int offset;
 336
 337        if (ipv6_ext_hdr(nexthdr)) {
 338                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 339                if (offset < 0)
 340                        return 0;
 341        } else
 342                offset = sizeof(struct ipv6hdr);
 343
 344        if (nexthdr == IPPROTO_ICMPV6) {
 345                struct icmp6hdr *icmp6;
 346
 347                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 348                                         offset + 1 - skb->data)))
 349                        return 0;
 350
 351                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 352
 353                switch (icmp6->icmp6_type) {
 354                case NDISC_ROUTER_SOLICITATION:
 355                case NDISC_ROUTER_ADVERTISEMENT:
 356                case NDISC_NEIGHBOUR_SOLICITATION:
 357                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 358                case NDISC_REDIRECT:
 359                        /* For reaction involving unicast neighbor discovery
 360                         * message destined to the proxied address, pass it to
 361                         * input function.
 362                         */
 363                        return 1;
 364                default:
 365                        break;
 366                }
 367        }
 368
 369        /*
 370         * The proxying router can't forward traffic sent to a link-local
 371         * address, so signal the sender and discard the packet. This
 372         * behavior is clarified by the MIPv6 specification.
 373         */
 374        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 375                dst_link_failure(skb);
 376                return -1;
 377        }
 378
 379        return 0;
 380}
 381
 382static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 383                                     struct sk_buff *skb)
 384{
 385        struct dst_entry *dst = skb_dst(skb);
 386
 387        __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 388        __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 389
 390#ifdef CONFIG_NET_SWITCHDEV
 391        if (skb->offload_l3_fwd_mark) {
 392                consume_skb(skb);
 393                return 0;
 394        }
 395#endif
 396
 397        skb->tstamp = 0;
 398        return dst_output(net, sk, skb);
 399}
 400
 401static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 402{
 403        if (skb->len <= mtu)
 404                return false;
 405
 406        /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 407        if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 408                return true;
 409
 410        if (skb->ignore_df)
 411                return false;
 412
 413        if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 414                return false;
 415
 416        return true;
 417}
 418
 419int ip6_forward(struct sk_buff *skb)
 420{
 421        struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 422        struct dst_entry *dst = skb_dst(skb);
 423        struct ipv6hdr *hdr = ipv6_hdr(skb);
 424        struct inet6_skb_parm *opt = IP6CB(skb);
 425        struct net *net = dev_net(dst->dev);
 426        u32 mtu;
 427
 428        if (net->ipv6.devconf_all->forwarding == 0)
 429                goto error;
 430
 431        if (skb->pkt_type != PACKET_HOST)
 432                goto drop;
 433
 434        if (unlikely(skb->sk))
 435                goto drop;
 436
 437        if (skb_warn_if_lro(skb))
 438                goto drop;
 439
 440        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 441                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 442                goto drop;
 443        }
 444
 445        skb_forward_csum(skb);
 446
 447        /*
 448         *      We DO NOT make any processing on
 449         *      RA packets, pushing them to user level AS IS
 450         *      without ane WARRANTY that application will be able
 451         *      to interpret them. The reason is that we
 452         *      cannot make anything clever here.
 453         *
 454         *      We are not end-node, so that if packet contains
 455         *      AH/ESP, we cannot make anything.
 456         *      Defragmentation also would be mistake, RA packets
 457         *      cannot be fragmented, because there is no warranty
 458         *      that different fragments will go along one path. --ANK
 459         */
 460        if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 461                if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 462                        return 0;
 463        }
 464
 465        /*
 466         *      check and decrement ttl
 467         */
 468        if (hdr->hop_limit <= 1) {
 469                /* Force OUTPUT device used as source address */
 470                skb->dev = dst->dev;
 471                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 472                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 473
 474                kfree_skb(skb);
 475                return -ETIMEDOUT;
 476        }
 477
 478        /* XXX: idev->cnf.proxy_ndp? */
 479        if (net->ipv6.devconf_all->proxy_ndp &&
 480            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 481                int proxied = ip6_forward_proxy_check(skb);
 482                if (proxied > 0)
 483                        return ip6_input(skb);
 484                else if (proxied < 0) {
 485                        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 486                        goto drop;
 487                }
 488        }
 489
 490        if (!xfrm6_route_forward(skb)) {
 491                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 492                goto drop;
 493        }
 494        dst = skb_dst(skb);
 495
 496        /* IPv6 specs say nothing about it, but it is clear that we cannot
 497           send redirects to source routed frames.
 498           We don't send redirects to frames decapsulated from IPsec.
 499         */
 500        if (IP6CB(skb)->iif == dst->dev->ifindex &&
 501            opt->srcrt == 0 && !skb_sec_path(skb)) {
 502                struct in6_addr *target = NULL;
 503                struct inet_peer *peer;
 504                struct rt6_info *rt;
 505
 506                /*
 507                 *      incoming and outgoing devices are the same
 508                 *      send a redirect.
 509                 */
 510
 511                rt = (struct rt6_info *) dst;
 512                if (rt->rt6i_flags & RTF_GATEWAY)
 513                        target = &rt->rt6i_gateway;
 514                else
 515                        target = &hdr->daddr;
 516
 517                peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 518
 519                /* Limit redirects both by destination (here)
 520                   and by source (inside ndisc_send_redirect)
 521                 */
 522                if (inet_peer_xrlim_allow(peer, 1*HZ))
 523                        ndisc_send_redirect(skb, target);
 524                if (peer)
 525                        inet_putpeer(peer);
 526        } else {
 527                int addrtype = ipv6_addr_type(&hdr->saddr);
 528
 529                /* This check is security critical. */
 530                if (addrtype == IPV6_ADDR_ANY ||
 531                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 532                        goto error;
 533                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 534                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 535                                    ICMPV6_NOT_NEIGHBOUR, 0);
 536                        goto error;
 537                }
 538        }
 539
 540        mtu = ip6_dst_mtu_forward(dst);
 541        if (mtu < IPV6_MIN_MTU)
 542                mtu = IPV6_MIN_MTU;
 543
 544        if (ip6_pkt_too_big(skb, mtu)) {
 545                /* Again, force OUTPUT device used as source address */
 546                skb->dev = dst->dev;
 547                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 548                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 549                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 550                                IPSTATS_MIB_FRAGFAILS);
 551                kfree_skb(skb);
 552                return -EMSGSIZE;
 553        }
 554
 555        if (skb_cow(skb, dst->dev->hard_header_len)) {
 556                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 557                                IPSTATS_MIB_OUTDISCARDS);
 558                goto drop;
 559        }
 560
 561        hdr = ipv6_hdr(skb);
 562
 563        /* Mangling hops number delayed to point after skb COW */
 564
 565        hdr->hop_limit--;
 566
 567        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 568                       net, NULL, skb, skb->dev, dst->dev,
 569                       ip6_forward_finish);
 570
 571error:
 572        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 573drop:
 574        kfree_skb(skb);
 575        return -EINVAL;
 576}
 577
 578static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 579{
 580        to->pkt_type = from->pkt_type;
 581        to->priority = from->priority;
 582        to->protocol = from->protocol;
 583        skb_dst_drop(to);
 584        skb_dst_set(to, dst_clone(skb_dst(from)));
 585        to->dev = from->dev;
 586        to->mark = from->mark;
 587
 588        skb_copy_hash(to, from);
 589
 590#ifdef CONFIG_NET_SCHED
 591        to->tc_index = from->tc_index;
 592#endif
 593        nf_copy(to, from);
 594        skb_ext_copy(to, from);
 595        skb_copy_secmark(to, from);
 596}
 597
 598int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 599                 int (*output)(struct net *, struct sock *, struct sk_buff *))
 600{
 601        struct sk_buff *frag;
 602        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 603        struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 604                                inet6_sk(skb->sk) : NULL;
 605        struct ipv6hdr *tmp_hdr;
 606        struct frag_hdr *fh;
 607        unsigned int mtu, hlen, left, len, nexthdr_offset;
 608        int hroom, troom;
 609        __be32 frag_id;
 610        int ptr, offset = 0, err = 0;
 611        u8 *prevhdr, nexthdr = 0;
 612        ktime_t tstamp = skb->tstamp;
 613
 614        err = ip6_find_1stfragopt(skb, &prevhdr);
 615        if (err < 0)
 616                goto fail;
 617        hlen = err;
 618        nexthdr = *prevhdr;
 619        nexthdr_offset = prevhdr - skb_network_header(skb);
 620
 621        mtu = ip6_skb_dst_mtu(skb);
 622
 623        /* We must not fragment if the socket is set to force MTU discovery
 624         * or if the skb it not generated by a local socket.
 625         */
 626        if (unlikely(!skb->ignore_df && skb->len > mtu))
 627                goto fail_toobig;
 628
 629        if (IP6CB(skb)->frag_max_size) {
 630                if (IP6CB(skb)->frag_max_size > mtu)
 631                        goto fail_toobig;
 632
 633                /* don't send fragments larger than what we received */
 634                mtu = IP6CB(skb)->frag_max_size;
 635                if (mtu < IPV6_MIN_MTU)
 636                        mtu = IPV6_MIN_MTU;
 637        }
 638
 639        if (np && np->frag_size < mtu) {
 640                if (np->frag_size)
 641                        mtu = np->frag_size;
 642        }
 643        if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 644                goto fail_toobig;
 645        mtu -= hlen + sizeof(struct frag_hdr);
 646
 647        frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 648                                    &ipv6_hdr(skb)->saddr);
 649
 650        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 651            (err = skb_checksum_help(skb)))
 652                goto fail;
 653
 654        prevhdr = skb_network_header(skb) + nexthdr_offset;
 655        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 656        if (skb_has_frag_list(skb)) {
 657                unsigned int first_len = skb_pagelen(skb);
 658                struct sk_buff *frag2;
 659
 660                if (first_len - hlen > mtu ||
 661                    ((first_len - hlen) & 7) ||
 662                    skb_cloned(skb) ||
 663                    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 664                        goto slow_path;
 665
 666                skb_walk_frags(skb, frag) {
 667                        /* Correct geometry. */
 668                        if (frag->len > mtu ||
 669                            ((frag->len & 7) && frag->next) ||
 670                            skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 671                                goto slow_path_clean;
 672
 673                        /* Partially cloned skb? */
 674                        if (skb_shared(frag))
 675                                goto slow_path_clean;
 676
 677                        BUG_ON(frag->sk);
 678                        if (skb->sk) {
 679                                frag->sk = skb->sk;
 680                                frag->destructor = sock_wfree;
 681                        }
 682                        skb->truesize -= frag->truesize;
 683                }
 684
 685                err = 0;
 686                offset = 0;
 687                /* BUILD HEADER */
 688
 689                *prevhdr = NEXTHDR_FRAGMENT;
 690                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 691                if (!tmp_hdr) {
 692                        err = -ENOMEM;
 693                        goto fail;
 694                }
 695                frag = skb_shinfo(skb)->frag_list;
 696                skb_frag_list_init(skb);
 697
 698                __skb_pull(skb, hlen);
 699                fh = __skb_push(skb, sizeof(struct frag_hdr));
 700                __skb_push(skb, hlen);
 701                skb_reset_network_header(skb);
 702                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 703
 704                fh->nexthdr = nexthdr;
 705                fh->reserved = 0;
 706                fh->frag_off = htons(IP6_MF);
 707                fh->identification = frag_id;
 708
 709                first_len = skb_pagelen(skb);
 710                skb->data_len = first_len - skb_headlen(skb);
 711                skb->len = first_len;
 712                ipv6_hdr(skb)->payload_len = htons(first_len -
 713                                                   sizeof(struct ipv6hdr));
 714
 715                for (;;) {
 716                        /* Prepare header of the next frame,
 717                         * before previous one went down. */
 718                        if (frag) {
 719                                frag->ip_summed = CHECKSUM_NONE;
 720                                skb_reset_transport_header(frag);
 721                                fh = __skb_push(frag, sizeof(struct frag_hdr));
 722                                __skb_push(frag, hlen);
 723                                skb_reset_network_header(frag);
 724                                memcpy(skb_network_header(frag), tmp_hdr,
 725                                       hlen);
 726                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 727                                fh->nexthdr = nexthdr;
 728                                fh->reserved = 0;
 729                                fh->frag_off = htons(offset);
 730                                if (frag->next)
 731                                        fh->frag_off |= htons(IP6_MF);
 732                                fh->identification = frag_id;
 733                                ipv6_hdr(frag)->payload_len =
 734                                                htons(frag->len -
 735                                                      sizeof(struct ipv6hdr));
 736                                ip6_copy_metadata(frag, skb);
 737                        }
 738
 739                        skb->tstamp = tstamp;
 740                        err = output(net, sk, skb);
 741                        if (!err)
 742                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 743                                              IPSTATS_MIB_FRAGCREATES);
 744
 745                        if (err || !frag)
 746                                break;
 747
 748                        skb = frag;
 749                        frag = skb->next;
 750                        skb_mark_not_on_list(skb);
 751                }
 752
 753                kfree(tmp_hdr);
 754
 755                if (err == 0) {
 756                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 757                                      IPSTATS_MIB_FRAGOKS);
 758                        return 0;
 759                }
 760
 761                kfree_skb_list(frag);
 762
 763                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 764                              IPSTATS_MIB_FRAGFAILS);
 765                return err;
 766
 767slow_path_clean:
 768                skb_walk_frags(skb, frag2) {
 769                        if (frag2 == frag)
 770                                break;
 771                        frag2->sk = NULL;
 772                        frag2->destructor = NULL;
 773                        skb->truesize += frag2->truesize;
 774                }
 775        }
 776
 777slow_path:
 778        left = skb->len - hlen;         /* Space per frame */
 779        ptr = hlen;                     /* Where to start from */
 780
 781        /*
 782         *      Fragment the datagram.
 783         */
 784
 785        troom = rt->dst.dev->needed_tailroom;
 786
 787        /*
 788         *      Keep copying data until we run out.
 789         */
 790        while (left > 0)        {
 791                u8 *fragnexthdr_offset;
 792
 793                len = left;
 794                /* IF: it doesn't fit, use 'mtu' - the data space left */
 795                if (len > mtu)
 796                        len = mtu;
 797                /* IF: we are not sending up to and including the packet end
 798                   then align the next start on an eight byte boundary */
 799                if (len < left) {
 800                        len &= ~7;
 801                }
 802
 803                /* Allocate buffer */
 804                frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 805                                 hroom + troom, GFP_ATOMIC);
 806                if (!frag) {
 807                        err = -ENOMEM;
 808                        goto fail;
 809                }
 810
 811                /*
 812                 *      Set up data on packet
 813                 */
 814
 815                ip6_copy_metadata(frag, skb);
 816                skb_reserve(frag, hroom);
 817                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 818                skb_reset_network_header(frag);
 819                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 820                frag->transport_header = (frag->network_header + hlen +
 821                                          sizeof(struct frag_hdr));
 822
 823                /*
 824                 *      Charge the memory for the fragment to any owner
 825                 *      it might possess
 826                 */
 827                if (skb->sk)
 828                        skb_set_owner_w(frag, skb->sk);
 829
 830                /*
 831                 *      Copy the packet header into the new buffer.
 832                 */
 833                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 834
 835                fragnexthdr_offset = skb_network_header(frag);
 836                fragnexthdr_offset += prevhdr - skb_network_header(skb);
 837                *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 838
 839                /*
 840                 *      Build fragment header.
 841                 */
 842                fh->nexthdr = nexthdr;
 843                fh->reserved = 0;
 844                fh->identification = frag_id;
 845
 846                /*
 847                 *      Copy a block of the IP datagram.
 848                 */
 849                BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
 850                                     len));
 851                left -= len;
 852
 853                fh->frag_off = htons(offset);
 854                if (left > 0)
 855                        fh->frag_off |= htons(IP6_MF);
 856                ipv6_hdr(frag)->payload_len = htons(frag->len -
 857                                                    sizeof(struct ipv6hdr));
 858
 859                ptr += len;
 860                offset += len;
 861
 862                /*
 863                 *      Put this fragment into the sending queue.
 864                 */
 865                frag->tstamp = tstamp;
 866                err = output(net, sk, frag);
 867                if (err)
 868                        goto fail;
 869
 870                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 871                              IPSTATS_MIB_FRAGCREATES);
 872        }
 873        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 874                      IPSTATS_MIB_FRAGOKS);
 875        consume_skb(skb);
 876        return err;
 877
 878fail_toobig:
 879        if (skb->sk && dst_allfrag(skb_dst(skb)))
 880                sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 881
 882        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 883        err = -EMSGSIZE;
 884
 885fail:
 886        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 887                      IPSTATS_MIB_FRAGFAILS);
 888        kfree_skb(skb);
 889        return err;
 890}
 891
 892static inline int ip6_rt_check(const struct rt6key *rt_key,
 893                               const struct in6_addr *fl_addr,
 894                               const struct in6_addr *addr_cache)
 895{
 896        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 897                (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 898}
 899
 900static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 901                                          struct dst_entry *dst,
 902                                          const struct flowi6 *fl6)
 903{
 904        struct ipv6_pinfo *np = inet6_sk(sk);
 905        struct rt6_info *rt;
 906
 907        if (!dst)
 908                goto out;
 909
 910        if (dst->ops->family != AF_INET6) {
 911                dst_release(dst);
 912                return NULL;
 913        }
 914
 915        rt = (struct rt6_info *)dst;
 916        /* Yes, checking route validity in not connected
 917         * case is not very simple. Take into account,
 918         * that we do not support routing by source, TOS,
 919         * and MSG_DONTROUTE            --ANK (980726)
 920         *
 921         * 1. ip6_rt_check(): If route was host route,
 922         *    check that cached destination is current.
 923         *    If it is network route, we still may
 924         *    check its validity using saved pointer
 925         *    to the last used address: daddr_cache.
 926         *    We do not want to save whole address now,
 927         *    (because main consumer of this service
 928         *    is tcp, which has not this problem),
 929         *    so that the last trick works only on connected
 930         *    sockets.
 931         * 2. oif also should be the same.
 932         */
 933        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 934#ifdef CONFIG_IPV6_SUBTREES
 935            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 936#endif
 937           (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 938              (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 939                dst_release(dst);
 940                dst = NULL;
 941        }
 942
 943out:
 944        return dst;
 945}
 946
 947static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 948                               struct dst_entry **dst, struct flowi6 *fl6)
 949{
 950#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 951        struct neighbour *n;
 952        struct rt6_info *rt;
 953#endif
 954        int err;
 955        int flags = 0;
 956
 957        /* The correct way to handle this would be to do
 958         * ip6_route_get_saddr, and then ip6_route_output; however,
 959         * the route-specific preferred source forces the
 960         * ip6_route_output call _before_ ip6_route_get_saddr.
 961         *
 962         * In source specific routing (no src=any default route),
 963         * ip6_route_output will fail given src=any saddr, though, so
 964         * that's why we try it again later.
 965         */
 966        if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
 967                struct fib6_info *from;
 968                struct rt6_info *rt;
 969                bool had_dst = *dst != NULL;
 970
 971                if (!had_dst)
 972                        *dst = ip6_route_output(net, sk, fl6);
 973                rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 974
 975                rcu_read_lock();
 976                from = rt ? rcu_dereference(rt->from) : NULL;
 977                err = ip6_route_get_saddr(net, from, &fl6->daddr,
 978                                          sk ? inet6_sk(sk)->srcprefs : 0,
 979                                          &fl6->saddr);
 980                rcu_read_unlock();
 981
 982                if (err)
 983                        goto out_err_release;
 984
 985                /* If we had an erroneous initial result, pretend it
 986                 * never existed and let the SA-enabled version take
 987                 * over.
 988                 */
 989                if (!had_dst && (*dst)->error) {
 990                        dst_release(*dst);
 991                        *dst = NULL;
 992                }
 993
 994                if (fl6->flowi6_oif)
 995                        flags |= RT6_LOOKUP_F_IFACE;
 996        }
 997
 998        if (!*dst)
 999                *dst = ip6_route_output_flags(net, sk, fl6, flags);
1000
1001        err = (*dst)->error;
1002        if (err)
1003                goto out_err_release;
1004
1005#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1006        /*
1007         * Here if the dst entry we've looked up
1008         * has a neighbour entry that is in the INCOMPLETE
1009         * state and the src address from the flow is
1010         * marked as OPTIMISTIC, we release the found
1011         * dst entry and replace it instead with the
1012         * dst entry of the nexthop router
1013         */
1014        rt = (struct rt6_info *) *dst;
1015        rcu_read_lock_bh();
1016        n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1017                                      rt6_nexthop(rt, &fl6->daddr));
1018        err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1019        rcu_read_unlock_bh();
1020
1021        if (err) {
1022                struct inet6_ifaddr *ifp;
1023                struct flowi6 fl_gw6;
1024                int redirect;
1025
1026                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1027                                      (*dst)->dev, 1);
1028
1029                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1030                if (ifp)
1031                        in6_ifa_put(ifp);
1032
1033                if (redirect) {
1034                        /*
1035                         * We need to get the dst entry for the
1036                         * default router instead
1037                         */
1038                        dst_release(*dst);
1039                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1040                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1041                        *dst = ip6_route_output(net, sk, &fl_gw6);
1042                        err = (*dst)->error;
1043                        if (err)
1044                                goto out_err_release;
1045                }
1046        }
1047#endif
1048        if (ipv6_addr_v4mapped(&fl6->saddr) &&
1049            !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1050                err = -EAFNOSUPPORT;
1051                goto out_err_release;
1052        }
1053
1054        return 0;
1055
1056out_err_release:
1057        dst_release(*dst);
1058        *dst = NULL;
1059
1060        if (err == -ENETUNREACH)
1061                IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1062        return err;
1063}
1064
1065/**
1066 *      ip6_dst_lookup - perform route lookup on flow
1067 *      @sk: socket which provides route info
1068 *      @dst: pointer to dst_entry * for result
1069 *      @fl6: flow to lookup
1070 *
1071 *      This function performs a route lookup on the given flow.
1072 *
1073 *      It returns zero on success, or a standard errno code on error.
1074 */
1075int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1076                   struct flowi6 *fl6)
1077{
1078        *dst = NULL;
1079        return ip6_dst_lookup_tail(net, sk, dst, fl6);
1080}
1081EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1082
1083/**
1084 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1085 *      @sk: socket which provides route info
1086 *      @fl6: flow to lookup
1087 *      @final_dst: final destination address for ipsec lookup
1088 *
1089 *      This function performs a route lookup on the given flow.
1090 *
1091 *      It returns a valid dst pointer on success, or a pointer encoded
1092 *      error code.
1093 */
1094struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1095                                      const struct in6_addr *final_dst)
1096{
1097        struct dst_entry *dst = NULL;
1098        int err;
1099
1100        err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1101        if (err)
1102                return ERR_PTR(err);
1103        if (final_dst)
1104                fl6->daddr = *final_dst;
1105
1106        return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1107}
1108EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1109
1110/**
1111 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1112 *      @sk: socket which provides the dst cache and route info
1113 *      @fl6: flow to lookup
1114 *      @final_dst: final destination address for ipsec lookup
1115 *      @connected: whether @sk is connected or not
1116 *
1117 *      This function performs a route lookup on the given flow with the
1118 *      possibility of using the cached route in the socket if it is valid.
1119 *      It will take the socket dst lock when operating on the dst cache.
1120 *      As a result, this function can only be used in process context.
1121 *
1122 *      In addition, for a connected socket, cache the dst in the socket
1123 *      if the current cache is not valid.
1124 *
1125 *      It returns a valid dst pointer on success, or a pointer encoded
1126 *      error code.
1127 */
1128struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1129                                         const struct in6_addr *final_dst,
1130                                         bool connected)
1131{
1132        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1133
1134        dst = ip6_sk_dst_check(sk, dst, fl6);
1135        if (dst)
1136                return dst;
1137
1138        dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1139        if (connected && !IS_ERR(dst))
1140                ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1141
1142        return dst;
1143}
1144EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1145
1146/**
1147 *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1148 *      @skb: Packet for which lookup is done
1149 *      @dev: Tunnel device
1150 *      @net: Network namespace of tunnel device
1151 *      @sk: Socket which provides route info
1152 *      @saddr: Memory to store the src ip address
1153 *      @info: Tunnel information
1154 *      @protocol: IP protocol
1155 *      @use_cahce: Flag to enable cache usage
1156 *      This function performs a route lookup on a tunnel
1157 *
1158 *      It returns a valid dst pointer and stores src address to be used in
1159 *      tunnel in param saddr on success, else a pointer encoded error code.
1160 */
1161
1162struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1163                                        struct net_device *dev,
1164                                        struct net *net,
1165                                        struct socket *sock,
1166                                        struct in6_addr *saddr,
1167                                        const struct ip_tunnel_info *info,
1168                                        u8 protocol,
1169                                        bool use_cache)
1170{
1171        struct dst_entry *dst = NULL;
1172#ifdef CONFIG_DST_CACHE
1173        struct dst_cache *dst_cache;
1174#endif
1175        struct flowi6 fl6;
1176        __u8 prio;
1177
1178#ifdef CONFIG_DST_CACHE
1179        dst_cache = (struct dst_cache *)&info->dst_cache;
1180        if (use_cache) {
1181                dst = dst_cache_get_ip6(dst_cache, saddr);
1182                if (dst)
1183                        return dst;
1184        }
1185#endif
1186        memset(&fl6, 0, sizeof(fl6));
1187        fl6.flowi6_mark = skb->mark;
1188        fl6.flowi6_proto = protocol;
1189        fl6.daddr = info->key.u.ipv6.dst;
1190        fl6.saddr = info->key.u.ipv6.src;
1191        prio = info->key.tos;
1192        fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1193                                          info->key.label);
1194
1195        dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1196                                              NULL);
1197        if (IS_ERR(dst)) {
1198                netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1199                return ERR_PTR(-ENETUNREACH);
1200        }
1201        if (dst->dev == dev) { /* is this necessary? */
1202                netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1203                dst_release(dst);
1204                return ERR_PTR(-ELOOP);
1205        }
1206#ifdef CONFIG_DST_CACHE
1207        if (use_cache)
1208                dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1209#endif
1210        *saddr = fl6.saddr;
1211        return dst;
1212}
1213EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1214
1215static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1216                                               gfp_t gfp)
1217{
1218        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1219}
1220
1221static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1222                                                gfp_t gfp)
1223{
1224        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1225}
1226
1227static void ip6_append_data_mtu(unsigned int *mtu,
1228                                int *maxfraglen,
1229                                unsigned int fragheaderlen,
1230                                struct sk_buff *skb,
1231                                struct rt6_info *rt,
1232                                unsigned int orig_mtu)
1233{
1234        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1235                if (!skb) {
1236                        /* first fragment, reserve header_len */
1237                        *mtu = orig_mtu - rt->dst.header_len;
1238
1239                } else {
1240                        /*
1241                         * this fragment is not first, the headers
1242                         * space is regarded as data space.
1243                         */
1244                        *mtu = orig_mtu;
1245                }
1246                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1247                              + fragheaderlen - sizeof(struct frag_hdr);
1248        }
1249}
1250
1251static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1252                          struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1253                          struct rt6_info *rt, struct flowi6 *fl6,
1254                          const struct sockcm_cookie *sockc)
1255{
1256        struct ipv6_pinfo *np = inet6_sk(sk);
1257        unsigned int mtu;
1258        struct ipv6_txoptions *opt = ipc6->opt;
1259
1260        /*
1261         * setup for corking
1262         */
1263        if (opt) {
1264                if (WARN_ON(v6_cork->opt))
1265                        return -EINVAL;
1266
1267                v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1268                if (unlikely(!v6_cork->opt))
1269                        return -ENOBUFS;
1270
1271                v6_cork->opt->tot_len = sizeof(*opt);
1272                v6_cork->opt->opt_flen = opt->opt_flen;
1273                v6_cork->opt->opt_nflen = opt->opt_nflen;
1274
1275                v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1276                                                    sk->sk_allocation);
1277                if (opt->dst0opt && !v6_cork->opt->dst0opt)
1278                        return -ENOBUFS;
1279
1280                v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1281                                                    sk->sk_allocation);
1282                if (opt->dst1opt && !v6_cork->opt->dst1opt)
1283                        return -ENOBUFS;
1284
1285                v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1286                                                   sk->sk_allocation);
1287                if (opt->hopopt && !v6_cork->opt->hopopt)
1288                        return -ENOBUFS;
1289
1290                v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1291                                                    sk->sk_allocation);
1292                if (opt->srcrt && !v6_cork->opt->srcrt)
1293                        return -ENOBUFS;
1294
1295                /* need source address above miyazawa*/
1296        }
1297        dst_hold(&rt->dst);
1298        cork->base.dst = &rt->dst;
1299        cork->fl.u.ip6 = *fl6;
1300        v6_cork->hop_limit = ipc6->hlimit;
1301        v6_cork->tclass = ipc6->tclass;
1302        if (rt->dst.flags & DST_XFRM_TUNNEL)
1303                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1304                      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1305        else
1306                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1307                        READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1308        if (np->frag_size < mtu) {
1309                if (np->frag_size)
1310                        mtu = np->frag_size;
1311        }
1312        if (mtu < IPV6_MIN_MTU)
1313                return -EINVAL;
1314        cork->base.fragsize = mtu;
1315        cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
1316                              sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
1317
1318        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1319                cork->base.flags |= IPCORK_ALLFRAG;
1320        cork->base.length = 0;
1321
1322        cork->base.transmit_time = sockc->transmit_time;
1323
1324        return 0;
1325}
1326
1327static int __ip6_append_data(struct sock *sk,
1328                             struct flowi6 *fl6,
1329                             struct sk_buff_head *queue,
1330                             struct inet_cork *cork,
1331                             struct inet6_cork *v6_cork,
1332                             struct page_frag *pfrag,
1333                             int getfrag(void *from, char *to, int offset,
1334                                         int len, int odd, struct sk_buff *skb),
1335                             void *from, int length, int transhdrlen,
1336                             unsigned int flags, struct ipcm6_cookie *ipc6,
1337                             const struct sockcm_cookie *sockc)
1338{
1339        struct sk_buff *skb, *skb_prev = NULL;
1340        unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1341        int exthdrlen = 0;
1342        int dst_exthdrlen = 0;
1343        int hh_len;
1344        int copy;
1345        int err;
1346        int offset = 0;
1347        __u8 tx_flags = 0;
1348        u32 tskey = 0;
1349        struct rt6_info *rt = (struct rt6_info *)cork->dst;
1350        struct ipv6_txoptions *opt = v6_cork->opt;
1351        int csummode = CHECKSUM_NONE;
1352        unsigned int maxnonfragsize, headersize;
1353        unsigned int wmem_alloc_delta = 0;
1354        bool paged;
1355
1356        skb = skb_peek_tail(queue);
1357        if (!skb) {
1358                exthdrlen = opt ? opt->opt_flen : 0;
1359                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1360        }
1361
1362        paged = !!cork->gso_size;
1363        mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1364        orig_mtu = mtu;
1365
1366        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1367
1368        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1369                        (opt ? opt->opt_nflen : 0);
1370        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1371                     sizeof(struct frag_hdr);
1372
1373        headersize = sizeof(struct ipv6hdr) +
1374                     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1375                     (dst_allfrag(&rt->dst) ?
1376                      sizeof(struct frag_hdr) : 0) +
1377                     rt->rt6i_nfheader_len;
1378
1379        /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1380         * the first fragment
1381         */
1382        if (headersize + transhdrlen > mtu)
1383                goto emsgsize;
1384
1385        if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1386            (sk->sk_protocol == IPPROTO_UDP ||
1387             sk->sk_protocol == IPPROTO_RAW)) {
1388                ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1389                                sizeof(struct ipv6hdr));
1390                goto emsgsize;
1391        }
1392
1393        if (ip6_sk_ignore_df(sk))
1394                maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1395        else
1396                maxnonfragsize = mtu;
1397
1398        if (cork->length + length > maxnonfragsize - headersize) {
1399emsgsize:
1400                pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1401                ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1402                return -EMSGSIZE;
1403        }
1404
1405        /* CHECKSUM_PARTIAL only with no extension headers and when
1406         * we are not going to fragment
1407         */
1408        if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1409            headersize == sizeof(struct ipv6hdr) &&
1410            length <= mtu - headersize &&
1411            (!(flags & MSG_MORE) || cork->gso_size) &&
1412            rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1413                csummode = CHECKSUM_PARTIAL;
1414
1415        if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1416                sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1417                if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1418                    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1419                        tskey = sk->sk_tskey++;
1420        }
1421
1422        /*
1423         * Let's try using as much space as possible.
1424         * Use MTU if total length of the message fits into the MTU.
1425         * Otherwise, we need to reserve fragment header and
1426         * fragment alignment (= 8-15 octects, in total).
1427         *
1428         * Note that we may need to "move" the data from the tail of
1429         * of the buffer to the new fragment when we split
1430         * the message.
1431         *
1432         * FIXME: It may be fragmented into multiple chunks
1433         *        at once if non-fragmentable extension headers
1434         *        are too large.
1435         * --yoshfuji
1436         */
1437
1438        cork->length += length;
1439        if (!skb)
1440                goto alloc_new_skb;
1441
1442        while (length > 0) {
1443                /* Check if the remaining data fits into current packet. */
1444                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1445                if (copy < length)
1446                        copy = maxfraglen - skb->len;
1447
1448                if (copy <= 0) {
1449                        char *data;
1450                        unsigned int datalen;
1451                        unsigned int fraglen;
1452                        unsigned int fraggap;
1453                        unsigned int alloclen;
1454                        unsigned int pagedlen;
1455alloc_new_skb:
1456                        /* There's no room in the current skb */
1457                        if (skb)
1458                                fraggap = skb->len - maxfraglen;
1459                        else
1460                                fraggap = 0;
1461                        /* update mtu and maxfraglen if necessary */
1462                        if (!skb || !skb_prev)
1463                                ip6_append_data_mtu(&mtu, &maxfraglen,
1464                                                    fragheaderlen, skb, rt,
1465                                                    orig_mtu);
1466
1467                        skb_prev = skb;
1468
1469                        /*
1470                         * If remaining data exceeds the mtu,
1471                         * we know we need more fragment(s).
1472                         */
1473                        datalen = length + fraggap;
1474
1475                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1476                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1477                        fraglen = datalen + fragheaderlen;
1478                        pagedlen = 0;
1479
1480                        if ((flags & MSG_MORE) &&
1481                            !(rt->dst.dev->features&NETIF_F_SG))
1482                                alloclen = mtu;
1483                        else if (!paged)
1484                                alloclen = fraglen;
1485                        else {
1486                                alloclen = min_t(int, fraglen, MAX_HEADER);
1487                                pagedlen = fraglen - alloclen;
1488                        }
1489
1490                        alloclen += dst_exthdrlen;
1491
1492                        if (datalen != length + fraggap) {
1493                                /*
1494                                 * this is not the last fragment, the trailer
1495                                 * space is regarded as data space.
1496                                 */
1497                                datalen += rt->dst.trailer_len;
1498                        }
1499
1500                        alloclen += rt->dst.trailer_len;
1501                        fraglen = datalen + fragheaderlen;
1502
1503                        /*
1504                         * We just reserve space for fragment header.
1505                         * Note: this may be overallocation if the message
1506                         * (without MSG_MORE) fits into the MTU.
1507                         */
1508                        alloclen += sizeof(struct frag_hdr);
1509
1510                        copy = datalen - transhdrlen - fraggap - pagedlen;
1511                        if (copy < 0) {
1512                                err = -EINVAL;
1513                                goto error;
1514                        }
1515                        if (transhdrlen) {
1516                                skb = sock_alloc_send_skb(sk,
1517                                                alloclen + hh_len,
1518                                                (flags & MSG_DONTWAIT), &err);
1519                        } else {
1520                                skb = NULL;
1521                                if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1522                                    2 * sk->sk_sndbuf)
1523                                        skb = alloc_skb(alloclen + hh_len,
1524                                                        sk->sk_allocation);
1525                                if (unlikely(!skb))
1526                                        err = -ENOBUFS;
1527                        }
1528                        if (!skb)
1529                                goto error;
1530                        /*
1531                         *      Fill in the control structures
1532                         */
1533                        skb->protocol = htons(ETH_P_IPV6);
1534                        skb->ip_summed = csummode;
1535                        skb->csum = 0;
1536                        /* reserve for fragmentation and ipsec header */
1537                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1538                                    dst_exthdrlen);
1539
1540                        /* Only the initial fragment is time stamped */
1541                        skb_shinfo(skb)->tx_flags = tx_flags;
1542                        tx_flags = 0;
1543                        skb_shinfo(skb)->tskey = tskey;
1544                        tskey = 0;
1545
1546                        /*
1547                         *      Find where to start putting bytes
1548                         */
1549                        data = skb_put(skb, fraglen - pagedlen);
1550                        skb_set_network_header(skb, exthdrlen);
1551                        data += fragheaderlen;
1552                        skb->transport_header = (skb->network_header +
1553                                                 fragheaderlen);
1554                        if (fraggap) {
1555                                skb->csum = skb_copy_and_csum_bits(
1556                                        skb_prev, maxfraglen,
1557                                        data + transhdrlen, fraggap, 0);
1558                                skb_prev->csum = csum_sub(skb_prev->csum,
1559                                                          skb->csum);
1560                                data += fraggap;
1561                                pskb_trim_unique(skb_prev, maxfraglen);
1562                        }
1563                        if (copy > 0 &&
1564                            getfrag(from, data + transhdrlen, offset,
1565                                    copy, fraggap, skb) < 0) {
1566                                err = -EFAULT;
1567                                kfree_skb(skb);
1568                                goto error;
1569                        }
1570
1571                        offset += copy;
1572                        length -= copy + transhdrlen;
1573                        transhdrlen = 0;
1574                        exthdrlen = 0;
1575                        dst_exthdrlen = 0;
1576
1577                        if ((flags & MSG_CONFIRM) && !skb_prev)
1578                                skb_set_dst_pending_confirm(skb, 1);
1579
1580                        /*
1581                         * Put the packet on the pending queue
1582                         */
1583                        if (!skb->destructor) {
1584                                skb->destructor = sock_wfree;
1585                                skb->sk = sk;
1586                                wmem_alloc_delta += skb->truesize;
1587                        }
1588                        __skb_queue_tail(queue, skb);
1589                        continue;
1590                }
1591
1592                if (copy > length)
1593                        copy = length;
1594
1595                if (!(rt->dst.dev->features&NETIF_F_SG) &&
1596                    skb_tailroom(skb) >= copy) {
1597                        unsigned int off;
1598
1599                        off = skb->len;
1600                        if (getfrag(from, skb_put(skb, copy),
1601                                                offset, copy, off, skb) < 0) {
1602                                __skb_trim(skb, off);
1603                                err = -EFAULT;
1604                                goto error;
1605                        }
1606                } else {
1607                        int i = skb_shinfo(skb)->nr_frags;
1608
1609                        err = -ENOMEM;
1610                        if (!sk_page_frag_refill(sk, pfrag))
1611                                goto error;
1612
1613                        if (!skb_can_coalesce(skb, i, pfrag->page,
1614                                              pfrag->offset)) {
1615                                err = -EMSGSIZE;
1616                                if (i == MAX_SKB_FRAGS)
1617                                        goto error;
1618
1619                                __skb_fill_page_desc(skb, i, pfrag->page,
1620                                                     pfrag->offset, 0);
1621                                skb_shinfo(skb)->nr_frags = ++i;
1622                                get_page(pfrag->page);
1623                        }
1624                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1625                        if (getfrag(from,
1626                                    page_address(pfrag->page) + pfrag->offset,
1627                                    offset, copy, skb->len, skb) < 0)
1628                                goto error_efault;
1629
1630                        pfrag->offset += copy;
1631                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1632                        skb->len += copy;
1633                        skb->data_len += copy;
1634                        skb->truesize += copy;
1635                        wmem_alloc_delta += copy;
1636                }
1637                offset += copy;
1638                length -= copy;
1639        }
1640
1641        if (wmem_alloc_delta)
1642                refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1643        return 0;
1644
1645error_efault:
1646        err = -EFAULT;
1647error:
1648        cork->length -= length;
1649        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1650        refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1651        return err;
1652}
1653
1654int ip6_append_data(struct sock *sk,
1655                    int getfrag(void *from, char *to, int offset, int len,
1656                                int odd, struct sk_buff *skb),
1657                    void *from, int length, int transhdrlen,
1658                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1659                    struct rt6_info *rt, unsigned int flags,
1660                    const struct sockcm_cookie *sockc)
1661{
1662        struct inet_sock *inet = inet_sk(sk);
1663        struct ipv6_pinfo *np = inet6_sk(sk);
1664        int exthdrlen;
1665        int err;
1666
1667        if (flags&MSG_PROBE)
1668                return 0;
1669        if (skb_queue_empty(&sk->sk_write_queue)) {
1670                /*
1671                 * setup for corking
1672                 */
1673                err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1674                                     ipc6, rt, fl6, sockc);
1675                if (err)
1676                        return err;
1677
1678                exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1679                length += exthdrlen;
1680                transhdrlen += exthdrlen;
1681        } else {
1682                fl6 = &inet->cork.fl.u.ip6;
1683                transhdrlen = 0;
1684        }
1685
1686        return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1687                                 &np->cork, sk_page_frag(sk), getfrag,
1688                                 from, length, transhdrlen, flags, ipc6, sockc);
1689}
1690EXPORT_SYMBOL_GPL(ip6_append_data);
1691
1692static void ip6_cork_release(struct inet_cork_full *cork,
1693                             struct inet6_cork *v6_cork)
1694{
1695        if (v6_cork->opt) {
1696                kfree(v6_cork->opt->dst0opt);
1697                kfree(v6_cork->opt->dst1opt);
1698                kfree(v6_cork->opt->hopopt);
1699                kfree(v6_cork->opt->srcrt);
1700                kfree(v6_cork->opt);
1701                v6_cork->opt = NULL;
1702        }
1703
1704        if (cork->base.dst) {
1705                dst_release(cork->base.dst);
1706                cork->base.dst = NULL;
1707                cork->base.flags &= ~IPCORK_ALLFRAG;
1708        }
1709        memset(&cork->fl, 0, sizeof(cork->fl));
1710}
1711
1712struct sk_buff *__ip6_make_skb(struct sock *sk,
1713                               struct sk_buff_head *queue,
1714                               struct inet_cork_full *cork,
1715                               struct inet6_cork *v6_cork)
1716{
1717        struct sk_buff *skb, *tmp_skb;
1718        struct sk_buff **tail_skb;
1719        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1720        struct ipv6_pinfo *np = inet6_sk(sk);
1721        struct net *net = sock_net(sk);
1722        struct ipv6hdr *hdr;
1723        struct ipv6_txoptions *opt = v6_cork->opt;
1724        struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1725        struct flowi6 *fl6 = &cork->fl.u.ip6;
1726        unsigned char proto = fl6->flowi6_proto;
1727
1728        skb = __skb_dequeue(queue);
1729        if (!skb)
1730                goto out;
1731        tail_skb = &(skb_shinfo(skb)->frag_list);
1732
1733        /* move skb->data to ip header from ext header */
1734        if (skb->data < skb_network_header(skb))
1735                __skb_pull(skb, skb_network_offset(skb));
1736        while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1737                __skb_pull(tmp_skb, skb_network_header_len(skb));
1738                *tail_skb = tmp_skb;
1739                tail_skb = &(tmp_skb->next);
1740                skb->len += tmp_skb->len;
1741                skb->data_len += tmp_skb->len;
1742                skb->truesize += tmp_skb->truesize;
1743                tmp_skb->destructor = NULL;
1744                tmp_skb->sk = NULL;
1745        }
1746
1747        /* Allow local fragmentation. */
1748        skb->ignore_df = ip6_sk_ignore_df(sk);
1749
1750        *final_dst = fl6->daddr;
1751        __skb_pull(skb, skb_network_header_len(skb));
1752        if (opt && opt->opt_flen)
1753                ipv6_push_frag_opts(skb, opt, &proto);
1754        if (opt && opt->opt_nflen)
1755                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1756
1757        skb_push(skb, sizeof(struct ipv6hdr));
1758        skb_reset_network_header(skb);
1759        hdr = ipv6_hdr(skb);
1760
1761        ip6_flow_hdr(hdr, v6_cork->tclass,
1762                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1763                                        ip6_autoflowlabel(net, np), fl6));
1764        hdr->hop_limit = v6_cork->hop_limit;
1765        hdr->nexthdr = proto;
1766        hdr->saddr = fl6->saddr;
1767        hdr->daddr = *final_dst;
1768
1769        skb->priority = sk->sk_priority;
1770        skb->mark = sk->sk_mark;
1771
1772        skb->tstamp = cork->base.transmit_time;
1773
1774        skb_dst_set(skb, dst_clone(&rt->dst));
1775        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1776        if (proto == IPPROTO_ICMPV6) {
1777                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1778
1779                ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1780                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1781        }
1782
1783        ip6_cork_release(cork, v6_cork);
1784out:
1785        return skb;
1786}
1787
1788int ip6_send_skb(struct sk_buff *skb)
1789{
1790        struct net *net = sock_net(skb->sk);
1791        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1792        int err;
1793
1794        err = ip6_local_out(net, skb->sk, skb);
1795        if (err) {
1796                if (err > 0)
1797                        err = net_xmit_errno(err);
1798                if (err)
1799                        IP6_INC_STATS(net, rt->rt6i_idev,
1800                                      IPSTATS_MIB_OUTDISCARDS);
1801        }
1802
1803        return err;
1804}
1805
1806int ip6_push_pending_frames(struct sock *sk)
1807{
1808        struct sk_buff *skb;
1809
1810        skb = ip6_finish_skb(sk);
1811        if (!skb)
1812                return 0;
1813
1814        return ip6_send_skb(skb);
1815}
1816EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1817
1818static void __ip6_flush_pending_frames(struct sock *sk,
1819                                       struct sk_buff_head *queue,
1820                                       struct inet_cork_full *cork,
1821                                       struct inet6_cork *v6_cork)
1822{
1823        struct sk_buff *skb;
1824
1825        while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1826                if (skb_dst(skb))
1827                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1828                                      IPSTATS_MIB_OUTDISCARDS);
1829                kfree_skb(skb);
1830        }
1831
1832        ip6_cork_release(cork, v6_cork);
1833}
1834
1835void ip6_flush_pending_frames(struct sock *sk)
1836{
1837        __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1838                                   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1839}
1840EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1841
1842struct sk_buff *ip6_make_skb(struct sock *sk,
1843                             int getfrag(void *from, char *to, int offset,
1844                                         int len, int odd, struct sk_buff *skb),
1845                             void *from, int length, int transhdrlen,
1846                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1847                             struct rt6_info *rt, unsigned int flags,
1848                             struct inet_cork_full *cork,
1849                             const struct sockcm_cookie *sockc)
1850{
1851        struct inet6_cork v6_cork;
1852        struct sk_buff_head queue;
1853        int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1854        int err;
1855
1856        if (flags & MSG_PROBE)
1857                return NULL;
1858
1859        __skb_queue_head_init(&queue);
1860
1861        cork->base.flags = 0;
1862        cork->base.addr = 0;
1863        cork->base.opt = NULL;
1864        cork->base.dst = NULL;
1865        v6_cork.opt = NULL;
1866        err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6, sockc);
1867        if (err) {
1868                ip6_cork_release(cork, &v6_cork);
1869                return ERR_PTR(err);
1870        }
1871        if (ipc6->dontfrag < 0)
1872                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1873
1874        err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1875                                &current->task_frag, getfrag, from,
1876                                length + exthdrlen, transhdrlen + exthdrlen,
1877                                flags, ipc6, sockc);
1878        if (err) {
1879                __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1880                return ERR_PTR(err);
1881        }
1882
1883        return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1884}
1885