linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58
  59static int ip6_finish_output2(struct sk_buff *skb)
  60{
  61        struct dst_entry *dst = skb_dst(skb);
  62        struct net_device *dev = dst->dev;
  63        struct neighbour *neigh;
  64        struct in6_addr *nexthop;
  65        int ret;
  66
  67        skb->protocol = htons(ETH_P_IPV6);
  68        skb->dev = dev;
  69
  70        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  71                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  72
  73                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
  74                    ((mroute6_socket(dev_net(dev), skb) &&
  75                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  76                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  77                                         &ipv6_hdr(skb)->saddr))) {
  78                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  79
  80                        /* Do not check for IFF_ALLMULTI; multicast routing
  81                           is not supported in any case.
  82                         */
  83                        if (newskb)
  84                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  85                                        newskb, NULL, newskb->dev,
  86                                        dev_loopback_xmit);
  87
  88                        if (ipv6_hdr(skb)->hop_limit == 0) {
  89                                IP6_INC_STATS(dev_net(dev), idev,
  90                                              IPSTATS_MIB_OUTDISCARDS);
  91                                kfree_skb(skb);
  92                                return 0;
  93                        }
  94                }
  95
  96                IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
  97                                skb->len);
  98
  99                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 100                    IPV6_ADDR_SCOPE_NODELOCAL &&
 101                    !(dev->flags & IFF_LOOPBACK)) {
 102                        kfree_skb(skb);
 103                        return 0;
 104                }
 105        }
 106
 107        rcu_read_lock_bh();
 108        nexthop = rt6_nexthop((struct rt6_info *)dst);
 109        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 110        if (unlikely(!neigh))
 111                neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 112        if (!IS_ERR(neigh)) {
 113                ret = dst_neigh_output(dst, neigh, skb);
 114                rcu_read_unlock_bh();
 115                return ret;
 116        }
 117        rcu_read_unlock_bh();
 118
 119        IP6_INC_STATS_BH(dev_net(dst->dev),
 120                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 121        kfree_skb(skb);
 122        return -EINVAL;
 123}
 124
 125static int ip6_finish_output(struct sk_buff *skb)
 126{
 127        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 128            dst_allfrag(skb_dst(skb)))
 129                return ip6_fragment(skb, ip6_finish_output2);
 130        else
 131                return ip6_finish_output2(skb);
 132}
 133
 134int ip6_output(struct sk_buff *skb)
 135{
 136        struct net_device *dev = skb_dst(skb)->dev;
 137        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 138        if (unlikely(idev->cnf.disable_ipv6)) {
 139                IP6_INC_STATS(dev_net(dev), idev,
 140                              IPSTATS_MIB_OUTDISCARDS);
 141                kfree_skb(skb);
 142                return 0;
 143        }
 144
 145        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 146                            ip6_finish_output,
 147                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 148}
 149
 150/*
 151 *      xmit an sk_buff (used by TCP, SCTP and DCCP)
 152 */
 153
 154int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 155             struct ipv6_txoptions *opt, int tclass)
 156{
 157        struct net *net = sock_net(sk);
 158        struct ipv6_pinfo *np = inet6_sk(sk);
 159        struct in6_addr *first_hop = &fl6->daddr;
 160        struct dst_entry *dst = skb_dst(skb);
 161        struct ipv6hdr *hdr;
 162        u8  proto = fl6->flowi6_proto;
 163        int seg_len = skb->len;
 164        int hlimit = -1;
 165        u32 mtu;
 166
 167        if (opt) {
 168                unsigned int head_room;
 169
 170                /* First: exthdrs may take lots of space (~8K for now)
 171                   MAX_HEADER is not enough.
 172                 */
 173                head_room = opt->opt_nflen + opt->opt_flen;
 174                seg_len += head_room;
 175                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 176
 177                if (skb_headroom(skb) < head_room) {
 178                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 179                        if (skb2 == NULL) {
 180                                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 181                                              IPSTATS_MIB_OUTDISCARDS);
 182                                kfree_skb(skb);
 183                                return -ENOBUFS;
 184                        }
 185                        consume_skb(skb);
 186                        skb = skb2;
 187                        skb_set_owner_w(skb, sk);
 188                }
 189                if (opt->opt_flen)
 190                        ipv6_push_frag_opts(skb, opt, &proto);
 191                if (opt->opt_nflen)
 192                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 193        }
 194
 195        skb_push(skb, sizeof(struct ipv6hdr));
 196        skb_reset_network_header(skb);
 197        hdr = ipv6_hdr(skb);
 198
 199        /*
 200         *      Fill in the IPv6 header
 201         */
 202        if (np)
 203                hlimit = np->hop_limit;
 204        if (hlimit < 0)
 205                hlimit = ip6_dst_hoplimit(dst);
 206
 207        ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
 208
 209        hdr->payload_len = htons(seg_len);
 210        hdr->nexthdr = proto;
 211        hdr->hop_limit = hlimit;
 212
 213        hdr->saddr = fl6->saddr;
 214        hdr->daddr = *first_hop;
 215
 216        skb->protocol = htons(ETH_P_IPV6);
 217        skb->priority = sk->sk_priority;
 218        skb->mark = sk->sk_mark;
 219
 220        mtu = dst_mtu(dst);
 221        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 222                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 223                              IPSTATS_MIB_OUT, skb->len);
 224                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 225                               dst->dev, dst_output);
 226        }
 227
 228        skb->dev = dst->dev;
 229        ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
 230        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 231        kfree_skb(skb);
 232        return -EMSGSIZE;
 233}
 234
 235EXPORT_SYMBOL(ip6_xmit);
 236
 237static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 238{
 239        struct ip6_ra_chain *ra;
 240        struct sock *last = NULL;
 241
 242        read_lock(&ip6_ra_lock);
 243        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 244                struct sock *sk = ra->sk;
 245                if (sk && ra->sel == sel &&
 246                    (!sk->sk_bound_dev_if ||
 247                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 248                        if (last) {
 249                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 250                                if (skb2)
 251                                        rawv6_rcv(last, skb2);
 252                        }
 253                        last = sk;
 254                }
 255        }
 256
 257        if (last) {
 258                rawv6_rcv(last, skb);
 259                read_unlock(&ip6_ra_lock);
 260                return 1;
 261        }
 262        read_unlock(&ip6_ra_lock);
 263        return 0;
 264}
 265
 266static int ip6_forward_proxy_check(struct sk_buff *skb)
 267{
 268        struct ipv6hdr *hdr = ipv6_hdr(skb);
 269        u8 nexthdr = hdr->nexthdr;
 270        __be16 frag_off;
 271        int offset;
 272
 273        if (ipv6_ext_hdr(nexthdr)) {
 274                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 275                if (offset < 0)
 276                        return 0;
 277        } else
 278                offset = sizeof(struct ipv6hdr);
 279
 280        if (nexthdr == IPPROTO_ICMPV6) {
 281                struct icmp6hdr *icmp6;
 282
 283                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 284                                         offset + 1 - skb->data)))
 285                        return 0;
 286
 287                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 288
 289                switch (icmp6->icmp6_type) {
 290                case NDISC_ROUTER_SOLICITATION:
 291                case NDISC_ROUTER_ADVERTISEMENT:
 292                case NDISC_NEIGHBOUR_SOLICITATION:
 293                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 294                case NDISC_REDIRECT:
 295                        /* For reaction involving unicast neighbor discovery
 296                         * message destined to the proxied address, pass it to
 297                         * input function.
 298                         */
 299                        return 1;
 300                default:
 301                        break;
 302                }
 303        }
 304
 305        /*
 306         * The proxying router can't forward traffic sent to a link-local
 307         * address, so signal the sender and discard the packet. This
 308         * behavior is clarified by the MIPv6 specification.
 309         */
 310        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 311                dst_link_failure(skb);
 312                return -1;
 313        }
 314
 315        return 0;
 316}
 317
 318static inline int ip6_forward_finish(struct sk_buff *skb)
 319{
 320        return dst_output(skb);
 321}
 322
 323int ip6_forward(struct sk_buff *skb)
 324{
 325        struct dst_entry *dst = skb_dst(skb);
 326        struct ipv6hdr *hdr = ipv6_hdr(skb);
 327        struct inet6_skb_parm *opt = IP6CB(skb);
 328        struct net *net = dev_net(dst->dev);
 329        u32 mtu;
 330
 331        if (net->ipv6.devconf_all->forwarding == 0)
 332                goto error;
 333
 334        if (skb_warn_if_lro(skb))
 335                goto drop;
 336
 337        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 338                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 339                goto drop;
 340        }
 341
 342        if (skb->pkt_type != PACKET_HOST)
 343                goto drop;
 344
 345        skb_forward_csum(skb);
 346
 347        /*
 348         *      We DO NOT make any processing on
 349         *      RA packets, pushing them to user level AS IS
 350         *      without ane WARRANTY that application will be able
 351         *      to interpret them. The reason is that we
 352         *      cannot make anything clever here.
 353         *
 354         *      We are not end-node, so that if packet contains
 355         *      AH/ESP, we cannot make anything.
 356         *      Defragmentation also would be mistake, RA packets
 357         *      cannot be fragmented, because there is no warranty
 358         *      that different fragments will go along one path. --ANK
 359         */
 360        if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 361                if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 362                        return 0;
 363        }
 364
 365        /*
 366         *      check and decrement ttl
 367         */
 368        if (hdr->hop_limit <= 1) {
 369                /* Force OUTPUT device used as source address */
 370                skb->dev = dst->dev;
 371                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 372                IP6_INC_STATS_BH(net,
 373                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 374
 375                kfree_skb(skb);
 376                return -ETIMEDOUT;
 377        }
 378
 379        /* XXX: idev->cnf.proxy_ndp? */
 380        if (net->ipv6.devconf_all->proxy_ndp &&
 381            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 382                int proxied = ip6_forward_proxy_check(skb);
 383                if (proxied > 0)
 384                        return ip6_input(skb);
 385                else if (proxied < 0) {
 386                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 387                                      IPSTATS_MIB_INDISCARDS);
 388                        goto drop;
 389                }
 390        }
 391
 392        if (!xfrm6_route_forward(skb)) {
 393                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 394                goto drop;
 395        }
 396        dst = skb_dst(skb);
 397
 398        /* IPv6 specs say nothing about it, but it is clear that we cannot
 399           send redirects to source routed frames.
 400           We don't send redirects to frames decapsulated from IPsec.
 401         */
 402        if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 403                struct in6_addr *target = NULL;
 404                struct inet_peer *peer;
 405                struct rt6_info *rt;
 406
 407                /*
 408                 *      incoming and outgoing devices are the same
 409                 *      send a redirect.
 410                 */
 411
 412                rt = (struct rt6_info *) dst;
 413                if (rt->rt6i_flags & RTF_GATEWAY)
 414                        target = &rt->rt6i_gateway;
 415                else
 416                        target = &hdr->daddr;
 417
 418                peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 419
 420                /* Limit redirects both by destination (here)
 421                   and by source (inside ndisc_send_redirect)
 422                 */
 423                if (inet_peer_xrlim_allow(peer, 1*HZ))
 424                        ndisc_send_redirect(skb, target);
 425                if (peer)
 426                        inet_putpeer(peer);
 427        } else {
 428                int addrtype = ipv6_addr_type(&hdr->saddr);
 429
 430                /* This check is security critical. */
 431                if (addrtype == IPV6_ADDR_ANY ||
 432                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 433                        goto error;
 434                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 435                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 436                                    ICMPV6_NOT_NEIGHBOUR, 0);
 437                        goto error;
 438                }
 439        }
 440
 441        mtu = dst_mtu(dst);
 442        if (mtu < IPV6_MIN_MTU)
 443                mtu = IPV6_MIN_MTU;
 444
 445        if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
 446            (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
 447                /* Again, force OUTPUT device used as source address */
 448                skb->dev = dst->dev;
 449                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 450                IP6_INC_STATS_BH(net,
 451                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 452                IP6_INC_STATS_BH(net,
 453                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 454                kfree_skb(skb);
 455                return -EMSGSIZE;
 456        }
 457
 458        if (skb_cow(skb, dst->dev->hard_header_len)) {
 459                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 460                goto drop;
 461        }
 462
 463        hdr = ipv6_hdr(skb);
 464
 465        /* Mangling hops number delayed to point after skb COW */
 466
 467        hdr->hop_limit--;
 468
 469        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 470        IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 471        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 472                       ip6_forward_finish);
 473
 474error:
 475        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 476drop:
 477        kfree_skb(skb);
 478        return -EINVAL;
 479}
 480
 481static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 482{
 483        to->pkt_type = from->pkt_type;
 484        to->priority = from->priority;
 485        to->protocol = from->protocol;
 486        skb_dst_drop(to);
 487        skb_dst_set(to, dst_clone(skb_dst(from)));
 488        to->dev = from->dev;
 489        to->mark = from->mark;
 490
 491#ifdef CONFIG_NET_SCHED
 492        to->tc_index = from->tc_index;
 493#endif
 494        nf_copy(to, from);
 495#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 496        to->nf_trace = from->nf_trace;
 497#endif
 498        skb_copy_secmark(to, from);
 499}
 500
 501int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 502{
 503        struct sk_buff *frag;
 504        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 505        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 506        struct ipv6hdr *tmp_hdr;
 507        struct frag_hdr *fh;
 508        unsigned int mtu, hlen, left, len;
 509        int hroom, troom;
 510        __be32 frag_id = 0;
 511        int ptr, offset = 0, err=0;
 512        u8 *prevhdr, nexthdr = 0;
 513        struct net *net = dev_net(skb_dst(skb)->dev);
 514
 515        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 516        nexthdr = *prevhdr;
 517
 518        mtu = ip6_skb_dst_mtu(skb);
 519
 520        /* We must not fragment if the socket is set to force MTU discovery
 521         * or if the skb it not generated by a local socket.
 522         */
 523        if (unlikely(!skb->local_df && skb->len > mtu) ||
 524                     (IP6CB(skb)->frag_max_size &&
 525                      IP6CB(skb)->frag_max_size > mtu)) {
 526                if (skb->sk && dst_allfrag(skb_dst(skb)))
 527                        sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 528
 529                skb->dev = skb_dst(skb)->dev;
 530                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 531                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 532                              IPSTATS_MIB_FRAGFAILS);
 533                kfree_skb(skb);
 534                return -EMSGSIZE;
 535        }
 536
 537        if (np && np->frag_size < mtu) {
 538                if (np->frag_size)
 539                        mtu = np->frag_size;
 540        }
 541        mtu -= hlen + sizeof(struct frag_hdr);
 542
 543        if (skb_has_frag_list(skb)) {
 544                int first_len = skb_pagelen(skb);
 545                struct sk_buff *frag2;
 546
 547                if (first_len - hlen > mtu ||
 548                    ((first_len - hlen) & 7) ||
 549                    skb_cloned(skb))
 550                        goto slow_path;
 551
 552                skb_walk_frags(skb, frag) {
 553                        /* Correct geometry. */
 554                        if (frag->len > mtu ||
 555                            ((frag->len & 7) && frag->next) ||
 556                            skb_headroom(frag) < hlen)
 557                                goto slow_path_clean;
 558
 559                        /* Partially cloned skb? */
 560                        if (skb_shared(frag))
 561                                goto slow_path_clean;
 562
 563                        BUG_ON(frag->sk);
 564                        if (skb->sk) {
 565                                frag->sk = skb->sk;
 566                                frag->destructor = sock_wfree;
 567                        }
 568                        skb->truesize -= frag->truesize;
 569                }
 570
 571                err = 0;
 572                offset = 0;
 573                frag = skb_shinfo(skb)->frag_list;
 574                skb_frag_list_init(skb);
 575                /* BUILD HEADER */
 576
 577                *prevhdr = NEXTHDR_FRAGMENT;
 578                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 579                if (!tmp_hdr) {
 580                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 581                                      IPSTATS_MIB_FRAGFAILS);
 582                        return -ENOMEM;
 583                }
 584
 585                __skb_pull(skb, hlen);
 586                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 587                __skb_push(skb, hlen);
 588                skb_reset_network_header(skb);
 589                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 590
 591                ipv6_select_ident(fh, rt);
 592                fh->nexthdr = nexthdr;
 593                fh->reserved = 0;
 594                fh->frag_off = htons(IP6_MF);
 595                frag_id = fh->identification;
 596
 597                first_len = skb_pagelen(skb);
 598                skb->data_len = first_len - skb_headlen(skb);
 599                skb->len = first_len;
 600                ipv6_hdr(skb)->payload_len = htons(first_len -
 601                                                   sizeof(struct ipv6hdr));
 602
 603                dst_hold(&rt->dst);
 604
 605                for (;;) {
 606                        /* Prepare header of the next frame,
 607                         * before previous one went down. */
 608                        if (frag) {
 609                                frag->ip_summed = CHECKSUM_NONE;
 610                                skb_reset_transport_header(frag);
 611                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 612                                __skb_push(frag, hlen);
 613                                skb_reset_network_header(frag);
 614                                memcpy(skb_network_header(frag), tmp_hdr,
 615                                       hlen);
 616                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 617                                fh->nexthdr = nexthdr;
 618                                fh->reserved = 0;
 619                                fh->frag_off = htons(offset);
 620                                if (frag->next != NULL)
 621                                        fh->frag_off |= htons(IP6_MF);
 622                                fh->identification = frag_id;
 623                                ipv6_hdr(frag)->payload_len =
 624                                                htons(frag->len -
 625                                                      sizeof(struct ipv6hdr));
 626                                ip6_copy_metadata(frag, skb);
 627                        }
 628
 629                        err = output(skb);
 630                        if(!err)
 631                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 632                                              IPSTATS_MIB_FRAGCREATES);
 633
 634                        if (err || !frag)
 635                                break;
 636
 637                        skb = frag;
 638                        frag = skb->next;
 639                        skb->next = NULL;
 640                }
 641
 642                kfree(tmp_hdr);
 643
 644                if (err == 0) {
 645                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 646                                      IPSTATS_MIB_FRAGOKS);
 647                        ip6_rt_put(rt);
 648                        return 0;
 649                }
 650
 651                while (frag) {
 652                        skb = frag->next;
 653                        kfree_skb(frag);
 654                        frag = skb;
 655                }
 656
 657                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 658                              IPSTATS_MIB_FRAGFAILS);
 659                ip6_rt_put(rt);
 660                return err;
 661
 662slow_path_clean:
 663                skb_walk_frags(skb, frag2) {
 664                        if (frag2 == frag)
 665                                break;
 666                        frag2->sk = NULL;
 667                        frag2->destructor = NULL;
 668                        skb->truesize += frag2->truesize;
 669                }
 670        }
 671
 672slow_path:
 673        if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
 674            skb_checksum_help(skb))
 675                goto fail;
 676
 677        left = skb->len - hlen;         /* Space per frame */
 678        ptr = hlen;                     /* Where to start from */
 679
 680        /*
 681         *      Fragment the datagram.
 682         */
 683
 684        *prevhdr = NEXTHDR_FRAGMENT;
 685        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 686        troom = rt->dst.dev->needed_tailroom;
 687
 688        /*
 689         *      Keep copying data until we run out.
 690         */
 691        while(left > 0) {
 692                len = left;
 693                /* IF: it doesn't fit, use 'mtu' - the data space left */
 694                if (len > mtu)
 695                        len = mtu;
 696                /* IF: we are not sending up to and including the packet end
 697                   then align the next start on an eight byte boundary */
 698                if (len < left) {
 699                        len &= ~7;
 700                }
 701                /*
 702                 *      Allocate buffer.
 703                 */
 704
 705                if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 706                                      hroom + troom, GFP_ATOMIC)) == NULL) {
 707                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 708                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 709                                      IPSTATS_MIB_FRAGFAILS);
 710                        err = -ENOMEM;
 711                        goto fail;
 712                }
 713
 714                /*
 715                 *      Set up data on packet
 716                 */
 717
 718                ip6_copy_metadata(frag, skb);
 719                skb_reserve(frag, hroom);
 720                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 721                skb_reset_network_header(frag);
 722                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 723                frag->transport_header = (frag->network_header + hlen +
 724                                          sizeof(struct frag_hdr));
 725
 726                /*
 727                 *      Charge the memory for the fragment to any owner
 728                 *      it might possess
 729                 */
 730                if (skb->sk)
 731                        skb_set_owner_w(frag, skb->sk);
 732
 733                /*
 734                 *      Copy the packet header into the new buffer.
 735                 */
 736                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 737
 738                /*
 739                 *      Build fragment header.
 740                 */
 741                fh->nexthdr = nexthdr;
 742                fh->reserved = 0;
 743                if (!frag_id) {
 744                        ipv6_select_ident(fh, rt);
 745                        frag_id = fh->identification;
 746                } else
 747                        fh->identification = frag_id;
 748
 749                /*
 750                 *      Copy a block of the IP datagram.
 751                 */
 752                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 753                        BUG();
 754                left -= len;
 755
 756                fh->frag_off = htons(offset);
 757                if (left > 0)
 758                        fh->frag_off |= htons(IP6_MF);
 759                ipv6_hdr(frag)->payload_len = htons(frag->len -
 760                                                    sizeof(struct ipv6hdr));
 761
 762                ptr += len;
 763                offset += len;
 764
 765                /*
 766                 *      Put this fragment into the sending queue.
 767                 */
 768                err = output(frag);
 769                if (err)
 770                        goto fail;
 771
 772                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 773                              IPSTATS_MIB_FRAGCREATES);
 774        }
 775        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 776                      IPSTATS_MIB_FRAGOKS);
 777        consume_skb(skb);
 778        return err;
 779
 780fail:
 781        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 782                      IPSTATS_MIB_FRAGFAILS);
 783        kfree_skb(skb);
 784        return err;
 785}
 786
 787static inline int ip6_rt_check(const struct rt6key *rt_key,
 788                               const struct in6_addr *fl_addr,
 789                               const struct in6_addr *addr_cache)
 790{
 791        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 792                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 793}
 794
 795static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 796                                          struct dst_entry *dst,
 797                                          const struct flowi6 *fl6)
 798{
 799        struct ipv6_pinfo *np = inet6_sk(sk);
 800        struct rt6_info *rt;
 801
 802        if (!dst)
 803                goto out;
 804
 805        if (dst->ops->family != AF_INET6) {
 806                dst_release(dst);
 807                return NULL;
 808        }
 809
 810        rt = (struct rt6_info *)dst;
 811        /* Yes, checking route validity in not connected
 812         * case is not very simple. Take into account,
 813         * that we do not support routing by source, TOS,
 814         * and MSG_DONTROUTE            --ANK (980726)
 815         *
 816         * 1. ip6_rt_check(): If route was host route,
 817         *    check that cached destination is current.
 818         *    If it is network route, we still may
 819         *    check its validity using saved pointer
 820         *    to the last used address: daddr_cache.
 821         *    We do not want to save whole address now,
 822         *    (because main consumer of this service
 823         *    is tcp, which has not this problem),
 824         *    so that the last trick works only on connected
 825         *    sockets.
 826         * 2. oif also should be the same.
 827         */
 828        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 829#ifdef CONFIG_IPV6_SUBTREES
 830            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 831#endif
 832            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 833                dst_release(dst);
 834                dst = NULL;
 835        }
 836
 837out:
 838        return dst;
 839}
 840
 841static int ip6_dst_lookup_tail(struct sock *sk,
 842                               struct dst_entry **dst, struct flowi6 *fl6)
 843{
 844        struct net *net = sock_net(sk);
 845#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 846        struct neighbour *n;
 847        struct rt6_info *rt;
 848#endif
 849        int err;
 850
 851        if (*dst == NULL)
 852                *dst = ip6_route_output(net, sk, fl6);
 853
 854        if ((err = (*dst)->error))
 855                goto out_err_release;
 856
 857        if (ipv6_addr_any(&fl6->saddr)) {
 858                struct rt6_info *rt = (struct rt6_info *) *dst;
 859                err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 860                                          sk ? inet6_sk(sk)->srcprefs : 0,
 861                                          &fl6->saddr);
 862                if (err)
 863                        goto out_err_release;
 864        }
 865
 866#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 867        /*
 868         * Here if the dst entry we've looked up
 869         * has a neighbour entry that is in the INCOMPLETE
 870         * state and the src address from the flow is
 871         * marked as OPTIMISTIC, we release the found
 872         * dst entry and replace it instead with the
 873         * dst entry of the nexthop router
 874         */
 875        rt = (struct rt6_info *) *dst;
 876        rcu_read_lock_bh();
 877        n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
 878        err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
 879        rcu_read_unlock_bh();
 880
 881        if (err) {
 882                struct inet6_ifaddr *ifp;
 883                struct flowi6 fl_gw6;
 884                int redirect;
 885
 886                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 887                                      (*dst)->dev, 1);
 888
 889                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 890                if (ifp)
 891                        in6_ifa_put(ifp);
 892
 893                if (redirect) {
 894                        /*
 895                         * We need to get the dst entry for the
 896                         * default router instead
 897                         */
 898                        dst_release(*dst);
 899                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
 900                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
 901                        *dst = ip6_route_output(net, sk, &fl_gw6);
 902                        if ((err = (*dst)->error))
 903                                goto out_err_release;
 904                }
 905        }
 906#endif
 907
 908        return 0;
 909
 910out_err_release:
 911        if (err == -ENETUNREACH)
 912                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 913        dst_release(*dst);
 914        *dst = NULL;
 915        return err;
 916}
 917
 918/**
 919 *      ip6_dst_lookup - perform route lookup on flow
 920 *      @sk: socket which provides route info
 921 *      @dst: pointer to dst_entry * for result
 922 *      @fl6: flow to lookup
 923 *
 924 *      This function performs a route lookup on the given flow.
 925 *
 926 *      It returns zero on success, or a standard errno code on error.
 927 */
 928int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
 929{
 930        *dst = NULL;
 931        return ip6_dst_lookup_tail(sk, dst, fl6);
 932}
 933EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 934
 935/**
 936 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
 937 *      @sk: socket which provides route info
 938 *      @fl6: flow to lookup
 939 *      @final_dst: final destination address for ipsec lookup
 940 *      @can_sleep: we are in a sleepable context
 941 *
 942 *      This function performs a route lookup on the given flow.
 943 *
 944 *      It returns a valid dst pointer on success, or a pointer encoded
 945 *      error code.
 946 */
 947struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 948                                      const struct in6_addr *final_dst,
 949                                      bool can_sleep)
 950{
 951        struct dst_entry *dst = NULL;
 952        int err;
 953
 954        err = ip6_dst_lookup_tail(sk, &dst, fl6);
 955        if (err)
 956                return ERR_PTR(err);
 957        if (final_dst)
 958                fl6->daddr = *final_dst;
 959        if (can_sleep)
 960                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 961
 962        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 963}
 964EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
 965
 966/**
 967 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
 968 *      @sk: socket which provides the dst cache and route info
 969 *      @fl6: flow to lookup
 970 *      @final_dst: final destination address for ipsec lookup
 971 *      @can_sleep: we are in a sleepable context
 972 *
 973 *      This function performs a route lookup on the given flow with the
 974 *      possibility of using the cached route in the socket if it is valid.
 975 *      It will take the socket dst lock when operating on the dst cache.
 976 *      As a result, this function can only be used in process context.
 977 *
 978 *      It returns a valid dst pointer on success, or a pointer encoded
 979 *      error code.
 980 */
 981struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 982                                         const struct in6_addr *final_dst,
 983                                         bool can_sleep)
 984{
 985        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 986        int err;
 987
 988        dst = ip6_sk_dst_check(sk, dst, fl6);
 989
 990        err = ip6_dst_lookup_tail(sk, &dst, fl6);
 991        if (err)
 992                return ERR_PTR(err);
 993        if (final_dst)
 994                fl6->daddr = *final_dst;
 995        if (can_sleep)
 996                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 997
 998        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 999}
1000EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1001
1002static inline int ip6_ufo_append_data(struct sock *sk,
1003                        int getfrag(void *from, char *to, int offset, int len,
1004                        int odd, struct sk_buff *skb),
1005                        void *from, int length, int hh_len, int fragheaderlen,
1006                        int transhdrlen, int mtu,unsigned int flags,
1007                        struct rt6_info *rt)
1008
1009{
1010        struct sk_buff *skb;
1011        struct frag_hdr fhdr;
1012        int err;
1013
1014        /* There is support for UDP large send offload by network
1015         * device, so create one single skb packet containing complete
1016         * udp datagram
1017         */
1018        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1019                skb = sock_alloc_send_skb(sk,
1020                        hh_len + fragheaderlen + transhdrlen + 20,
1021                        (flags & MSG_DONTWAIT), &err);
1022                if (skb == NULL)
1023                        return err;
1024
1025                /* reserve space for Hardware header */
1026                skb_reserve(skb, hh_len);
1027
1028                /* create space for UDP/IP header */
1029                skb_put(skb,fragheaderlen + transhdrlen);
1030
1031                /* initialize network header pointer */
1032                skb_reset_network_header(skb);
1033
1034                /* initialize protocol header pointer */
1035                skb->transport_header = skb->network_header + fragheaderlen;
1036
1037                skb->protocol = htons(ETH_P_IPV6);
1038                skb->csum = 0;
1039
1040                __skb_queue_tail(&sk->sk_write_queue, skb);
1041        } else if (skb_is_gso(skb)) {
1042                goto append;
1043        }
1044
1045        skb->ip_summed = CHECKSUM_PARTIAL;
1046        /* Specify the length of each IPv6 datagram fragment.
1047         * It has to be a multiple of 8.
1048         */
1049        skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1050                                     sizeof(struct frag_hdr)) & ~7;
1051        skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1052        ipv6_select_ident(&fhdr, rt);
1053        skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1054
1055append:
1056        return skb_append_datato_frags(sk, skb, getfrag, from,
1057                                       (length - transhdrlen));
1058}
1059
1060static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1061                                               gfp_t gfp)
1062{
1063        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1064}
1065
1066static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1067                                                gfp_t gfp)
1068{
1069        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1070}
1071
1072static void ip6_append_data_mtu(unsigned int *mtu,
1073                                int *maxfraglen,
1074                                unsigned int fragheaderlen,
1075                                struct sk_buff *skb,
1076                                struct rt6_info *rt,
1077                                bool pmtuprobe)
1078{
1079        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1080                if (skb == NULL) {
1081                        /* first fragment, reserve header_len */
1082                        *mtu = *mtu - rt->dst.header_len;
1083
1084                } else {
1085                        /*
1086                         * this fragment is not first, the headers
1087                         * space is regarded as data space.
1088                         */
1089                        *mtu = min(*mtu, pmtuprobe ?
1090                                   rt->dst.dev->mtu :
1091                                   dst_mtu(rt->dst.path));
1092                }
1093                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1094                              + fragheaderlen - sizeof(struct frag_hdr);
1095        }
1096}
1097
1098int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1099        int offset, int len, int odd, struct sk_buff *skb),
1100        void *from, int length, int transhdrlen,
1101        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1102        struct rt6_info *rt, unsigned int flags, int dontfrag)
1103{
1104        struct inet_sock *inet = inet_sk(sk);
1105        struct ipv6_pinfo *np = inet6_sk(sk);
1106        struct inet_cork *cork;
1107        struct sk_buff *skb, *skb_prev = NULL;
1108        unsigned int maxfraglen, fragheaderlen, mtu;
1109        int exthdrlen;
1110        int dst_exthdrlen;
1111        int hh_len;
1112        int copy;
1113        int err;
1114        int offset = 0;
1115        __u8 tx_flags = 0;
1116
1117        if (flags&MSG_PROBE)
1118                return 0;
1119        cork = &inet->cork.base;
1120        if (skb_queue_empty(&sk->sk_write_queue)) {
1121                /*
1122                 * setup for corking
1123                 */
1124                if (opt) {
1125                        if (WARN_ON(np->cork.opt))
1126                                return -EINVAL;
1127
1128                        np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1129                        if (unlikely(np->cork.opt == NULL))
1130                                return -ENOBUFS;
1131
1132                        np->cork.opt->tot_len = opt->tot_len;
1133                        np->cork.opt->opt_flen = opt->opt_flen;
1134                        np->cork.opt->opt_nflen = opt->opt_nflen;
1135
1136                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1137                                                            sk->sk_allocation);
1138                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1139                                return -ENOBUFS;
1140
1141                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1142                                                            sk->sk_allocation);
1143                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1144                                return -ENOBUFS;
1145
1146                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1147                                                           sk->sk_allocation);
1148                        if (opt->hopopt && !np->cork.opt->hopopt)
1149                                return -ENOBUFS;
1150
1151                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1152                                                            sk->sk_allocation);
1153                        if (opt->srcrt && !np->cork.opt->srcrt)
1154                                return -ENOBUFS;
1155
1156                        /* need source address above miyazawa*/
1157                }
1158                dst_hold(&rt->dst);
1159                cork->dst = &rt->dst;
1160                inet->cork.fl.u.ip6 = *fl6;
1161                np->cork.hop_limit = hlimit;
1162                np->cork.tclass = tclass;
1163                if (rt->dst.flags & DST_XFRM_TUNNEL)
1164                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1165                              rt->dst.dev->mtu : dst_mtu(&rt->dst);
1166                else
1167                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1168                              rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1169                if (np->frag_size < mtu) {
1170                        if (np->frag_size)
1171                                mtu = np->frag_size;
1172                }
1173                cork->fragsize = mtu;
1174                if (dst_allfrag(rt->dst.path))
1175                        cork->flags |= IPCORK_ALLFRAG;
1176                cork->length = 0;
1177                exthdrlen = (opt ? opt->opt_flen : 0);
1178                length += exthdrlen;
1179                transhdrlen += exthdrlen;
1180                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1181        } else {
1182                rt = (struct rt6_info *)cork->dst;
1183                fl6 = &inet->cork.fl.u.ip6;
1184                opt = np->cork.opt;
1185                transhdrlen = 0;
1186                exthdrlen = 0;
1187                dst_exthdrlen = 0;
1188                mtu = cork->fragsize;
1189        }
1190
1191        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1192
1193        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1194                        (opt ? opt->opt_nflen : 0);
1195        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1196
1197        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1198                if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1199                        ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1200                        return -EMSGSIZE;
1201                }
1202        }
1203
1204        /* For UDP, check if TX timestamp is enabled */
1205        if (sk->sk_type == SOCK_DGRAM)
1206                sock_tx_timestamp(sk, &tx_flags);
1207
1208        /*
1209         * Let's try using as much space as possible.
1210         * Use MTU if total length of the message fits into the MTU.
1211         * Otherwise, we need to reserve fragment header and
1212         * fragment alignment (= 8-15 octects, in total).
1213         *
1214         * Note that we may need to "move" the data from the tail of
1215         * of the buffer to the new fragment when we split
1216         * the message.
1217         *
1218         * FIXME: It may be fragmented into multiple chunks
1219         *        at once if non-fragmentable extension headers
1220         *        are too large.
1221         * --yoshfuji
1222         */
1223
1224        if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1225                                           sk->sk_protocol == IPPROTO_RAW)) {
1226                ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1227                return -EMSGSIZE;
1228        }
1229
1230        skb = skb_peek_tail(&sk->sk_write_queue);
1231        cork->length += length;
1232        if (((length > mtu) ||
1233             (skb && skb_is_gso(skb))) &&
1234            (sk->sk_protocol == IPPROTO_UDP) &&
1235            (rt->dst.dev->features & NETIF_F_UFO)) {
1236                err = ip6_ufo_append_data(sk, getfrag, from, length,
1237                                          hh_len, fragheaderlen,
1238                                          transhdrlen, mtu, flags, rt);
1239                if (err)
1240                        goto error;
1241                return 0;
1242        }
1243
1244        if (!skb)
1245                goto alloc_new_skb;
1246
1247        while (length > 0) {
1248                /* Check if the remaining data fits into current packet. */
1249                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1250                if (copy < length)
1251                        copy = maxfraglen - skb->len;
1252
1253                if (copy <= 0) {
1254                        char *data;
1255                        unsigned int datalen;
1256                        unsigned int fraglen;
1257                        unsigned int fraggap;
1258                        unsigned int alloclen;
1259alloc_new_skb:
1260                        /* There's no room in the current skb */
1261                        if (skb)
1262                                fraggap = skb->len - maxfraglen;
1263                        else
1264                                fraggap = 0;
1265                        /* update mtu and maxfraglen if necessary */
1266                        if (skb == NULL || skb_prev == NULL)
1267                                ip6_append_data_mtu(&mtu, &maxfraglen,
1268                                                    fragheaderlen, skb, rt,
1269                                                    np->pmtudisc ==
1270                                                    IPV6_PMTUDISC_PROBE);
1271
1272                        skb_prev = skb;
1273
1274                        /*
1275                         * If remaining data exceeds the mtu,
1276                         * we know we need more fragment(s).
1277                         */
1278                        datalen = length + fraggap;
1279
1280                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1281                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1282                        if ((flags & MSG_MORE) &&
1283                            !(rt->dst.dev->features&NETIF_F_SG))
1284                                alloclen = mtu;
1285                        else
1286                                alloclen = datalen + fragheaderlen;
1287
1288                        alloclen += dst_exthdrlen;
1289
1290                        if (datalen != length + fraggap) {
1291                                /*
1292                                 * this is not the last fragment, the trailer
1293                                 * space is regarded as data space.
1294                                 */
1295                                datalen += rt->dst.trailer_len;
1296                        }
1297
1298                        alloclen += rt->dst.trailer_len;
1299                        fraglen = datalen + fragheaderlen;
1300
1301                        /*
1302                         * We just reserve space for fragment header.
1303                         * Note: this may be overallocation if the message
1304                         * (without MSG_MORE) fits into the MTU.
1305                         */
1306                        alloclen += sizeof(struct frag_hdr);
1307
1308                        if (transhdrlen) {
1309                                skb = sock_alloc_send_skb(sk,
1310                                                alloclen + hh_len,
1311                                                (flags & MSG_DONTWAIT), &err);
1312                        } else {
1313                                skb = NULL;
1314                                if (atomic_read(&sk->sk_wmem_alloc) <=
1315                                    2 * sk->sk_sndbuf)
1316                                        skb = sock_wmalloc(sk,
1317                                                           alloclen + hh_len, 1,
1318                                                           sk->sk_allocation);
1319                                if (unlikely(skb == NULL))
1320                                        err = -ENOBUFS;
1321                                else {
1322                                        /* Only the initial fragment
1323                                         * is time stamped.
1324                                         */
1325                                        tx_flags = 0;
1326                                }
1327                        }
1328                        if (skb == NULL)
1329                                goto error;
1330                        /*
1331                         *      Fill in the control structures
1332                         */
1333                        skb->protocol = htons(ETH_P_IPV6);
1334                        skb->ip_summed = CHECKSUM_NONE;
1335                        skb->csum = 0;
1336                        /* reserve for fragmentation and ipsec header */
1337                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1338                                    dst_exthdrlen);
1339
1340                        if (sk->sk_type == SOCK_DGRAM)
1341                                skb_shinfo(skb)->tx_flags = tx_flags;
1342
1343                        /*
1344                         *      Find where to start putting bytes
1345                         */
1346                        data = skb_put(skb, fraglen);
1347                        skb_set_network_header(skb, exthdrlen);
1348                        data += fragheaderlen;
1349                        skb->transport_header = (skb->network_header +
1350                                                 fragheaderlen);
1351                        if (fraggap) {
1352                                skb->csum = skb_copy_and_csum_bits(
1353                                        skb_prev, maxfraglen,
1354                                        data + transhdrlen, fraggap, 0);
1355                                skb_prev->csum = csum_sub(skb_prev->csum,
1356                                                          skb->csum);
1357                                data += fraggap;
1358                                pskb_trim_unique(skb_prev, maxfraglen);
1359                        }
1360                        copy = datalen - transhdrlen - fraggap;
1361
1362                        if (copy < 0) {
1363                                err = -EINVAL;
1364                                kfree_skb(skb);
1365                                goto error;
1366                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1367                                err = -EFAULT;
1368                                kfree_skb(skb);
1369                                goto error;
1370                        }
1371
1372                        offset += copy;
1373                        length -= datalen - fraggap;
1374                        transhdrlen = 0;
1375                        exthdrlen = 0;
1376                        dst_exthdrlen = 0;
1377
1378                        /*
1379                         * Put the packet on the pending queue
1380                         */
1381                        __skb_queue_tail(&sk->sk_write_queue, skb);
1382                        continue;
1383                }
1384
1385                if (copy > length)
1386                        copy = length;
1387
1388                if (!(rt->dst.dev->features&NETIF_F_SG)) {
1389                        unsigned int off;
1390
1391                        off = skb->len;
1392                        if (getfrag(from, skb_put(skb, copy),
1393                                                offset, copy, off, skb) < 0) {
1394                                __skb_trim(skb, off);
1395                                err = -EFAULT;
1396                                goto error;
1397                        }
1398                } else {
1399                        int i = skb_shinfo(skb)->nr_frags;
1400                        struct page_frag *pfrag = sk_page_frag(sk);
1401
1402                        err = -ENOMEM;
1403                        if (!sk_page_frag_refill(sk, pfrag))
1404                                goto error;
1405
1406                        if (!skb_can_coalesce(skb, i, pfrag->page,
1407                                              pfrag->offset)) {
1408                                err = -EMSGSIZE;
1409                                if (i == MAX_SKB_FRAGS)
1410                                        goto error;
1411
1412                                __skb_fill_page_desc(skb, i, pfrag->page,
1413                                                     pfrag->offset, 0);
1414                                skb_shinfo(skb)->nr_frags = ++i;
1415                                get_page(pfrag->page);
1416                        }
1417                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1418                        if (getfrag(from,
1419                                    page_address(pfrag->page) + pfrag->offset,
1420                                    offset, copy, skb->len, skb) < 0)
1421                                goto error_efault;
1422
1423                        pfrag->offset += copy;
1424                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1425                        skb->len += copy;
1426                        skb->data_len += copy;
1427                        skb->truesize += copy;
1428                        atomic_add(copy, &sk->sk_wmem_alloc);
1429                }
1430                offset += copy;
1431                length -= copy;
1432        }
1433
1434        return 0;
1435
1436error_efault:
1437        err = -EFAULT;
1438error:
1439        cork->length -= length;
1440        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1441        return err;
1442}
1443EXPORT_SYMBOL_GPL(ip6_append_data);
1444
1445static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1446{
1447        if (np->cork.opt) {
1448                kfree(np->cork.opt->dst0opt);
1449                kfree(np->cork.opt->dst1opt);
1450                kfree(np->cork.opt->hopopt);
1451                kfree(np->cork.opt->srcrt);
1452                kfree(np->cork.opt);
1453                np->cork.opt = NULL;
1454        }
1455
1456        if (inet->cork.base.dst) {
1457                dst_release(inet->cork.base.dst);
1458                inet->cork.base.dst = NULL;
1459                inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1460        }
1461        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1462}
1463
1464int ip6_push_pending_frames(struct sock *sk)
1465{
1466        struct sk_buff *skb, *tmp_skb;
1467        struct sk_buff **tail_skb;
1468        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1469        struct inet_sock *inet = inet_sk(sk);
1470        struct ipv6_pinfo *np = inet6_sk(sk);
1471        struct net *net = sock_net(sk);
1472        struct ipv6hdr *hdr;
1473        struct ipv6_txoptions *opt = np->cork.opt;
1474        struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1475        struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1476        unsigned char proto = fl6->flowi6_proto;
1477        int err = 0;
1478
1479        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1480                goto out;
1481        tail_skb = &(skb_shinfo(skb)->frag_list);
1482
1483        /* move skb->data to ip header from ext header */
1484        if (skb->data < skb_network_header(skb))
1485                __skb_pull(skb, skb_network_offset(skb));
1486        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1487                __skb_pull(tmp_skb, skb_network_header_len(skb));
1488                *tail_skb = tmp_skb;
1489                tail_skb = &(tmp_skb->next);
1490                skb->len += tmp_skb->len;
1491                skb->data_len += tmp_skb->len;
1492                skb->truesize += tmp_skb->truesize;
1493                tmp_skb->destructor = NULL;
1494                tmp_skb->sk = NULL;
1495        }
1496
1497        /* Allow local fragmentation. */
1498        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1499                skb->local_df = 1;
1500
1501        *final_dst = fl6->daddr;
1502        __skb_pull(skb, skb_network_header_len(skb));
1503        if (opt && opt->opt_flen)
1504                ipv6_push_frag_opts(skb, opt, &proto);
1505        if (opt && opt->opt_nflen)
1506                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1507
1508        skb_push(skb, sizeof(struct ipv6hdr));
1509        skb_reset_network_header(skb);
1510        hdr = ipv6_hdr(skb);
1511
1512        ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1513        hdr->hop_limit = np->cork.hop_limit;
1514        hdr->nexthdr = proto;
1515        hdr->saddr = fl6->saddr;
1516        hdr->daddr = *final_dst;
1517
1518        skb->priority = sk->sk_priority;
1519        skb->mark = sk->sk_mark;
1520
1521        skb_dst_set(skb, dst_clone(&rt->dst));
1522        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1523        if (proto == IPPROTO_ICMPV6) {
1524                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1525
1526                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1527                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1528        }
1529
1530        err = ip6_local_out(skb);
1531        if (err) {
1532                if (err > 0)
1533                        err = net_xmit_errno(err);
1534                if (err)
1535                        goto error;
1536        }
1537
1538out:
1539        ip6_cork_release(inet, np);
1540        return err;
1541error:
1542        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1543        goto out;
1544}
1545EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1546
1547void ip6_flush_pending_frames(struct sock *sk)
1548{
1549        struct sk_buff *skb;
1550
1551        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1552                if (skb_dst(skb))
1553                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1554                                      IPSTATS_MIB_OUTDISCARDS);
1555                kfree_skb(skb);
1556        }
1557
1558        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1559}
1560EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1561