linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9 *
  10 *      Based on linux/net/ipv4/ip_output.c
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 *      Changes:
  18 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19 *                              extension headers are implemented.
  20 *                              route changes now work.
  21 *                              ip6_forward does not confuse sniffers.
  22 *                              etc.
  23 *
  24 *      H. von Brand    :       Added missing #include <linux/string.h>
  25 *      Imran Patel     :       frag id should be in NBO
  26 *      Kazunori MIYAZAWA @USAGI
  27 *                      :       add ip6_append_data and related functions
  28 *                              for datagram xmit
  29 */
  30
  31#include <linux/errno.h>
  32#include <linux/types.h>
  33#include <linux/string.h>
  34#include <linux/socket.h>
  35#include <linux/net.h>
  36#include <linux/netdevice.h>
  37#include <linux/if_arp.h>
  38#include <linux/in6.h>
  39#include <linux/tcp.h>
  40#include <linux/route.h>
  41#include <linux/module.h>
  42
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58
  59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  62{
  63        static u32 ipv6_fragmentation_id = 1;
  64        static DEFINE_SPINLOCK(ip6_id_lock);
  65
  66        spin_lock_bh(&ip6_id_lock);
  67        fhdr->identification = htonl(ipv6_fragmentation_id);
  68        if (++ipv6_fragmentation_id == 0)
  69                ipv6_fragmentation_id = 1;
  70        spin_unlock_bh(&ip6_id_lock);
  71}
  72
  73static int ip6_output_finish(struct sk_buff *skb)
  74{
  75        struct dst_entry *dst = skb->dst;
  76
  77        if (dst->hh)
  78                return neigh_hh_output(dst->hh, skb);
  79        else if (dst->neighbour)
  80                return dst->neighbour->output(skb);
  81
  82        IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  83        kfree_skb(skb);
  84        return -EINVAL;
  85
  86}
  87
  88/* dev_loopback_xmit for use with netfilter. */
  89static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  90{
  91        skb_reset_mac_header(newskb);
  92        __skb_pull(newskb, skb_network_offset(newskb));
  93        newskb->pkt_type = PACKET_LOOPBACK;
  94        newskb->ip_summed = CHECKSUM_UNNECESSARY;
  95        BUG_TRAP(newskb->dst);
  96
  97        netif_rx(newskb);
  98        return 0;
  99}
 100
 101
 102static int ip6_output2(struct sk_buff *skb)
 103{
 104        struct dst_entry *dst = skb->dst;
 105        struct net_device *dev = dst->dev;
 106
 107        skb->protocol = htons(ETH_P_IPV6);
 108        skb->dev = dev;
 109
 110        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 111                struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 112                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 113
 114                if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 115                    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 116                                        &ipv6_hdr(skb)->saddr)) {
 117                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 118
 119                        /* Do not check for IFF_ALLMULTI; multicast routing
 120                           is not supported in any case.
 121                         */
 122                        if (newskb)
 123                                NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
 124                                        newskb->dev,
 125                                        ip6_dev_loopback_xmit);
 126
 127                        if (ipv6_hdr(skb)->hop_limit == 0) {
 128                                IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 129                                kfree_skb(skb);
 130                                return 0;
 131                        }
 132                }
 133
 134                IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 135        }
 136
 137        return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 138}
 139
 140static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 141{
 142        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 143
 144        return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 145               skb->dst->dev->mtu : dst_mtu(skb->dst);
 146}
 147
 148int ip6_output(struct sk_buff *skb)
 149{
 150        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 151                                dst_allfrag(skb->dst))
 152                return ip6_fragment(skb, ip6_output2);
 153        else
 154                return ip6_output2(skb);
 155}
 156
 157/*
 158 *      xmit an sk_buff (used by TCP)
 159 */
 160
 161int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 162             struct ipv6_txoptions *opt, int ipfragok)
 163{
 164        struct ipv6_pinfo *np = inet6_sk(sk);
 165        struct in6_addr *first_hop = &fl->fl6_dst;
 166        struct dst_entry *dst = skb->dst;
 167        struct ipv6hdr *hdr;
 168        u8  proto = fl->proto;
 169        int seg_len = skb->len;
 170        int hlimit, tclass;
 171        u32 mtu;
 172
 173        if (opt) {
 174                unsigned int head_room;
 175
 176                /* First: exthdrs may take lots of space (~8K for now)
 177                   MAX_HEADER is not enough.
 178                 */
 179                head_room = opt->opt_nflen + opt->opt_flen;
 180                seg_len += head_room;
 181                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 182
 183                if (skb_headroom(skb) < head_room) {
 184                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 185                        if (skb2 == NULL) {
 186                                IP6_INC_STATS(ip6_dst_idev(skb->dst),
 187                                              IPSTATS_MIB_OUTDISCARDS);
 188                                kfree_skb(skb);
 189                                return -ENOBUFS;
 190                        }
 191                        kfree_skb(skb);
 192                        skb = skb2;
 193                        if (sk)
 194                                skb_set_owner_w(skb, sk);
 195                }
 196                if (opt->opt_flen)
 197                        ipv6_push_frag_opts(skb, opt, &proto);
 198                if (opt->opt_nflen)
 199                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 200        }
 201
 202        skb_push(skb, sizeof(struct ipv6hdr));
 203        skb_reset_network_header(skb);
 204        hdr = ipv6_hdr(skb);
 205
 206        /*
 207         *      Fill in the IPv6 header
 208         */
 209
 210        hlimit = -1;
 211        if (np)
 212                hlimit = np->hop_limit;
 213        if (hlimit < 0)
 214                hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 215        if (hlimit < 0)
 216                hlimit = ipv6_get_hoplimit(dst->dev);
 217
 218        tclass = -1;
 219        if (np)
 220                tclass = np->tclass;
 221        if (tclass < 0)
 222                tclass = 0;
 223
 224        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 225
 226        hdr->payload_len = htons(seg_len);
 227        hdr->nexthdr = proto;
 228        hdr->hop_limit = hlimit;
 229
 230        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 231        ipv6_addr_copy(&hdr->daddr, first_hop);
 232
 233        skb->priority = sk->sk_priority;
 234
 235        mtu = dst_mtu(dst);
 236        if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 237                IP6_INC_STATS(ip6_dst_idev(skb->dst),
 238                              IPSTATS_MIB_OUTREQUESTS);
 239                return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
 240                                dst_output);
 241        }
 242
 243        if (net_ratelimit())
 244                printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 245        skb->dev = dst->dev;
 246        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 247        IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 248        kfree_skb(skb);
 249        return -EMSGSIZE;
 250}
 251
 252EXPORT_SYMBOL(ip6_xmit);
 253
 254/*
 255 *      To avoid extra problems ND packets are send through this
 256 *      routine. It's code duplication but I really want to avoid
 257 *      extra checks since ipv6_build_header is used by TCP (which
 258 *      is for us performance critical)
 259 */
 260
 261int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 262               struct in6_addr *saddr, struct in6_addr *daddr,
 263               int proto, int len)
 264{
 265        struct ipv6_pinfo *np = inet6_sk(sk);
 266        struct ipv6hdr *hdr;
 267        int totlen;
 268
 269        skb->protocol = htons(ETH_P_IPV6);
 270        skb->dev = dev;
 271
 272        totlen = len + sizeof(struct ipv6hdr);
 273
 274        skb_reset_network_header(skb);
 275        skb_put(skb, sizeof(struct ipv6hdr));
 276        hdr = ipv6_hdr(skb);
 277
 278        *(__be32*)hdr = htonl(0x60000000);
 279
 280        hdr->payload_len = htons(len);
 281        hdr->nexthdr = proto;
 282        hdr->hop_limit = np->hop_limit;
 283
 284        ipv6_addr_copy(&hdr->saddr, saddr);
 285        ipv6_addr_copy(&hdr->daddr, daddr);
 286
 287        return 0;
 288}
 289
 290static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 291{
 292        struct ip6_ra_chain *ra;
 293        struct sock *last = NULL;
 294
 295        read_lock(&ip6_ra_lock);
 296        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 297                struct sock *sk = ra->sk;
 298                if (sk && ra->sel == sel &&
 299                    (!sk->sk_bound_dev_if ||
 300                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 301                        if (last) {
 302                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 303                                if (skb2)
 304                                        rawv6_rcv(last, skb2);
 305                        }
 306                        last = sk;
 307                }
 308        }
 309
 310        if (last) {
 311                rawv6_rcv(last, skb);
 312                read_unlock(&ip6_ra_lock);
 313                return 1;
 314        }
 315        read_unlock(&ip6_ra_lock);
 316        return 0;
 317}
 318
 319static int ip6_forward_proxy_check(struct sk_buff *skb)
 320{
 321        struct ipv6hdr *hdr = ipv6_hdr(skb);
 322        u8 nexthdr = hdr->nexthdr;
 323        int offset;
 324
 325        if (ipv6_ext_hdr(nexthdr)) {
 326                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 327                if (offset < 0)
 328                        return 0;
 329        } else
 330                offset = sizeof(struct ipv6hdr);
 331
 332        if (nexthdr == IPPROTO_ICMPV6) {
 333                struct icmp6hdr *icmp6;
 334
 335                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 336                                         offset + 1 - skb->data)))
 337                        return 0;
 338
 339                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 340
 341                switch (icmp6->icmp6_type) {
 342                case NDISC_ROUTER_SOLICITATION:
 343                case NDISC_ROUTER_ADVERTISEMENT:
 344                case NDISC_NEIGHBOUR_SOLICITATION:
 345                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 346                case NDISC_REDIRECT:
 347                        /* For reaction involving unicast neighbor discovery
 348                         * message destined to the proxied address, pass it to
 349                         * input function.
 350                         */
 351                        return 1;
 352                default:
 353                        break;
 354                }
 355        }
 356
 357        /*
 358         * The proxying router can't forward traffic sent to a link-local
 359         * address, so signal the sender and discard the packet. This
 360         * behavior is clarified by the MIPv6 specification.
 361         */
 362        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 363                dst_link_failure(skb);
 364                return -1;
 365        }
 366
 367        return 0;
 368}
 369
 370static inline int ip6_forward_finish(struct sk_buff *skb)
 371{
 372        return dst_output(skb);
 373}
 374
 375int ip6_forward(struct sk_buff *skb)
 376{
 377        struct dst_entry *dst = skb->dst;
 378        struct ipv6hdr *hdr = ipv6_hdr(skb);
 379        struct inet6_skb_parm *opt = IP6CB(skb);
 380
 381        if (ipv6_devconf.forwarding == 0)
 382                goto error;
 383
 384        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 385                IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 386                goto drop;
 387        }
 388
 389        skb_forward_csum(skb);
 390
 391        /*
 392         *      We DO NOT make any processing on
 393         *      RA packets, pushing them to user level AS IS
 394         *      without ane WARRANTY that application will be able
 395         *      to interpret them. The reason is that we
 396         *      cannot make anything clever here.
 397         *
 398         *      We are not end-node, so that if packet contains
 399         *      AH/ESP, we cannot make anything.
 400         *      Defragmentation also would be mistake, RA packets
 401         *      cannot be fragmented, because there is no warranty
 402         *      that different fragments will go along one path. --ANK
 403         */
 404        if (opt->ra) {
 405                u8 *ptr = skb_network_header(skb) + opt->ra;
 406                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 407                        return 0;
 408        }
 409
 410        /*
 411         *      check and decrement ttl
 412         */
 413        if (hdr->hop_limit <= 1) {
 414                /* Force OUTPUT device used as source address */
 415                skb->dev = dst->dev;
 416                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 417                            0, skb->dev);
 418                IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 419
 420                kfree_skb(skb);
 421                return -ETIMEDOUT;
 422        }
 423
 424        /* XXX: idev->cnf.proxy_ndp? */
 425        if (ipv6_devconf.proxy_ndp &&
 426            pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
 427                int proxied = ip6_forward_proxy_check(skb);
 428                if (proxied > 0)
 429                        return ip6_input(skb);
 430                else if (proxied < 0) {
 431                        IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 432                        goto drop;
 433                }
 434        }
 435
 436        if (!xfrm6_route_forward(skb)) {
 437                IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 438                goto drop;
 439        }
 440        dst = skb->dst;
 441
 442        /* IPv6 specs say nothing about it, but it is clear that we cannot
 443           send redirects to source routed frames.
 444           We don't send redirects to frames decapsulated from IPsec.
 445         */
 446        if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 447            !skb->sp) {
 448                struct in6_addr *target = NULL;
 449                struct rt6_info *rt;
 450                struct neighbour *n = dst->neighbour;
 451
 452                /*
 453                 *      incoming and outgoing devices are the same
 454                 *      send a redirect.
 455                 */
 456
 457                rt = (struct rt6_info *) dst;
 458                if ((rt->rt6i_flags & RTF_GATEWAY))
 459                        target = (struct in6_addr*)&n->primary_key;
 460                else
 461                        target = &hdr->daddr;
 462
 463                /* Limit redirects both by destination (here)
 464                   and by source (inside ndisc_send_redirect)
 465                 */
 466                if (xrlim_allow(dst, 1*HZ))
 467                        ndisc_send_redirect(skb, n, target);
 468        } else {
 469                int addrtype = ipv6_addr_type(&hdr->saddr);
 470
 471                /* This check is security critical. */
 472                if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 473                        goto error;
 474                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 475                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 476                                ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 477                        goto error;
 478                }
 479        }
 480
 481        if (skb->len > dst_mtu(dst)) {
 482                /* Again, force OUTPUT device used as source address */
 483                skb->dev = dst->dev;
 484                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 485                IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 486                IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 487                kfree_skb(skb);
 488                return -EMSGSIZE;
 489        }
 490
 491        if (skb_cow(skb, dst->dev->hard_header_len)) {
 492                IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 493                goto drop;
 494        }
 495
 496        hdr = ipv6_hdr(skb);
 497
 498        /* Mangling hops number delayed to point after skb COW */
 499
 500        hdr->hop_limit--;
 501
 502        IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 503        return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 504
 505error:
 506        IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 507drop:
 508        kfree_skb(skb);
 509        return -EINVAL;
 510}
 511
 512static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 513{
 514        to->pkt_type = from->pkt_type;
 515        to->priority = from->priority;
 516        to->protocol = from->protocol;
 517        dst_release(to->dst);
 518        to->dst = dst_clone(from->dst);
 519        to->dev = from->dev;
 520        to->mark = from->mark;
 521
 522#ifdef CONFIG_NET_SCHED
 523        to->tc_index = from->tc_index;
 524#endif
 525        nf_copy(to, from);
 526#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 527    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 528        to->nf_trace = from->nf_trace;
 529#endif
 530        skb_copy_secmark(to, from);
 531}
 532
 533int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 534{
 535        u16 offset = sizeof(struct ipv6hdr);
 536        struct ipv6_opt_hdr *exthdr =
 537                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 538        unsigned int packet_len = skb->tail - skb->network_header;
 539        int found_rhdr = 0;
 540        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 541
 542        while (offset + 1 <= packet_len) {
 543
 544                switch (**nexthdr) {
 545
 546                case NEXTHDR_HOP:
 547                        break;
 548                case NEXTHDR_ROUTING:
 549                        found_rhdr = 1;
 550                        break;
 551                case NEXTHDR_DEST:
 552#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 553                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 554                                break;
 555#endif
 556                        if (found_rhdr)
 557                                return offset;
 558                        break;
 559                default :
 560                        return offset;
 561                }
 562
 563                offset += ipv6_optlen(exthdr);
 564                *nexthdr = &exthdr->nexthdr;
 565                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 566                                                 offset);
 567        }
 568
 569        return offset;
 570}
 571EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 572
 573static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 574{
 575        struct net_device *dev;
 576        struct sk_buff *frag;
 577        struct rt6_info *rt = (struct rt6_info*)skb->dst;
 578        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 579        struct ipv6hdr *tmp_hdr;
 580        struct frag_hdr *fh;
 581        unsigned int mtu, hlen, left, len;
 582        __be32 frag_id = 0;
 583        int ptr, offset = 0, err=0;
 584        u8 *prevhdr, nexthdr = 0;
 585
 586        dev = rt->u.dst.dev;
 587        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 588        nexthdr = *prevhdr;
 589
 590        mtu = ip6_skb_dst_mtu(skb);
 591
 592        /* We must not fragment if the socket is set to force MTU discovery
 593         * or if the skb it not generated by a local socket.  (This last
 594         * check should be redundant, but it's free.)
 595         */
 596        if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
 597                skb->dev = skb->dst->dev;
 598                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 599                IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 600                kfree_skb(skb);
 601                return -EMSGSIZE;
 602        }
 603
 604        if (np && np->frag_size < mtu) {
 605                if (np->frag_size)
 606                        mtu = np->frag_size;
 607        }
 608        mtu -= hlen + sizeof(struct frag_hdr);
 609
 610        if (skb_shinfo(skb)->frag_list) {
 611                int first_len = skb_pagelen(skb);
 612
 613                if (first_len - hlen > mtu ||
 614                    ((first_len - hlen) & 7) ||
 615                    skb_cloned(skb))
 616                        goto slow_path;
 617
 618                for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 619                        /* Correct geometry. */
 620                        if (frag->len > mtu ||
 621                            ((frag->len & 7) && frag->next) ||
 622                            skb_headroom(frag) < hlen)
 623                            goto slow_path;
 624
 625                        /* Partially cloned skb? */
 626                        if (skb_shared(frag))
 627                                goto slow_path;
 628
 629                        BUG_ON(frag->sk);
 630                        if (skb->sk) {
 631                                sock_hold(skb->sk);
 632                                frag->sk = skb->sk;
 633                                frag->destructor = sock_wfree;
 634                                skb->truesize -= frag->truesize;
 635                        }
 636                }
 637
 638                err = 0;
 639                offset = 0;
 640                frag = skb_shinfo(skb)->frag_list;
 641                skb_shinfo(skb)->frag_list = NULL;
 642                /* BUILD HEADER */
 643
 644                *prevhdr = NEXTHDR_FRAGMENT;
 645                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 646                if (!tmp_hdr) {
 647                        IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 648                        return -ENOMEM;
 649                }
 650
 651                __skb_pull(skb, hlen);
 652                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 653                __skb_push(skb, hlen);
 654                skb_reset_network_header(skb);
 655                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 656
 657                ipv6_select_ident(skb, fh);
 658                fh->nexthdr = nexthdr;
 659                fh->reserved = 0;
 660                fh->frag_off = htons(IP6_MF);
 661                frag_id = fh->identification;
 662
 663                first_len = skb_pagelen(skb);
 664                skb->data_len = first_len - skb_headlen(skb);
 665                skb->len = first_len;
 666                ipv6_hdr(skb)->payload_len = htons(first_len -
 667                                                   sizeof(struct ipv6hdr));
 668
 669                dst_hold(&rt->u.dst);
 670
 671                for (;;) {
 672                        /* Prepare header of the next frame,
 673                         * before previous one went down. */
 674                        if (frag) {
 675                                frag->ip_summed = CHECKSUM_NONE;
 676                                skb_reset_transport_header(frag);
 677                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 678                                __skb_push(frag, hlen);
 679                                skb_reset_network_header(frag);
 680                                memcpy(skb_network_header(frag), tmp_hdr,
 681                                       hlen);
 682                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 683                                fh->nexthdr = nexthdr;
 684                                fh->reserved = 0;
 685                                fh->frag_off = htons(offset);
 686                                if (frag->next != NULL)
 687                                        fh->frag_off |= htons(IP6_MF);
 688                                fh->identification = frag_id;
 689                                ipv6_hdr(frag)->payload_len =
 690                                                htons(frag->len -
 691                                                      sizeof(struct ipv6hdr));
 692                                ip6_copy_metadata(frag, skb);
 693                        }
 694
 695                        err = output(skb);
 696                        if(!err)
 697                                IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 698
 699                        if (err || !frag)
 700                                break;
 701
 702                        skb = frag;
 703                        frag = skb->next;
 704                        skb->next = NULL;
 705                }
 706
 707                kfree(tmp_hdr);
 708
 709                if (err == 0) {
 710                        IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 711                        dst_release(&rt->u.dst);
 712                        return 0;
 713                }
 714
 715                while (frag) {
 716                        skb = frag->next;
 717                        kfree_skb(frag);
 718                        frag = skb;
 719                }
 720
 721                IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 722                dst_release(&rt->u.dst);
 723                return err;
 724        }
 725
 726slow_path:
 727        left = skb->len - hlen;         /* Space per frame */
 728        ptr = hlen;                     /* Where to start from */
 729
 730        /*
 731         *      Fragment the datagram.
 732         */
 733
 734        *prevhdr = NEXTHDR_FRAGMENT;
 735
 736        /*
 737         *      Keep copying data until we run out.
 738         */
 739        while(left > 0) {
 740                len = left;
 741                /* IF: it doesn't fit, use 'mtu' - the data space left */
 742                if (len > mtu)
 743                        len = mtu;
 744                /* IF: we are not sending upto and including the packet end
 745                   then align the next start on an eight byte boundary */
 746                if (len < left) {
 747                        len &= ~7;
 748                }
 749                /*
 750                 *      Allocate buffer.
 751                 */
 752
 753                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 754                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 755                        IP6_INC_STATS(ip6_dst_idev(skb->dst),
 756                                      IPSTATS_MIB_FRAGFAILS);
 757                        err = -ENOMEM;
 758                        goto fail;
 759                }
 760
 761                /*
 762                 *      Set up data on packet
 763                 */
 764
 765                ip6_copy_metadata(frag, skb);
 766                skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 767                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 768                skb_reset_network_header(frag);
 769                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 770                frag->transport_header = (frag->network_header + hlen +
 771                                          sizeof(struct frag_hdr));
 772
 773                /*
 774                 *      Charge the memory for the fragment to any owner
 775                 *      it might possess
 776                 */
 777                if (skb->sk)
 778                        skb_set_owner_w(frag, skb->sk);
 779
 780                /*
 781                 *      Copy the packet header into the new buffer.
 782                 */
 783                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 784
 785                /*
 786                 *      Build fragment header.
 787                 */
 788                fh->nexthdr = nexthdr;
 789                fh->reserved = 0;
 790                if (!frag_id) {
 791                        ipv6_select_ident(skb, fh);
 792                        frag_id = fh->identification;
 793                } else
 794                        fh->identification = frag_id;
 795
 796                /*
 797                 *      Copy a block of the IP datagram.
 798                 */
 799                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 800                        BUG();
 801                left -= len;
 802
 803                fh->frag_off = htons(offset);
 804                if (left > 0)
 805                        fh->frag_off |= htons(IP6_MF);
 806                ipv6_hdr(frag)->payload_len = htons(frag->len -
 807                                                    sizeof(struct ipv6hdr));
 808
 809                ptr += len;
 810                offset += len;
 811
 812                /*
 813                 *      Put this fragment into the sending queue.
 814                 */
 815                err = output(frag);
 816                if (err)
 817                        goto fail;
 818
 819                IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 820        }
 821        IP6_INC_STATS(ip6_dst_idev(skb->dst),
 822                      IPSTATS_MIB_FRAGOKS);
 823        kfree_skb(skb);
 824        return err;
 825
 826fail:
 827        IP6_INC_STATS(ip6_dst_idev(skb->dst),
 828                      IPSTATS_MIB_FRAGFAILS);
 829        kfree_skb(skb);
 830        return err;
 831}
 832
 833static inline int ip6_rt_check(struct rt6key *rt_key,
 834                               struct in6_addr *fl_addr,
 835                               struct in6_addr *addr_cache)
 836{
 837        return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 838                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 839}
 840
 841static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 842                                          struct dst_entry *dst,
 843                                          struct flowi *fl)
 844{
 845        struct ipv6_pinfo *np = inet6_sk(sk);
 846        struct rt6_info *rt = (struct rt6_info *)dst;
 847
 848        if (!dst)
 849                goto out;
 850
 851        /* Yes, checking route validity in not connected
 852         * case is not very simple. Take into account,
 853         * that we do not support routing by source, TOS,
 854         * and MSG_DONTROUTE            --ANK (980726)
 855         *
 856         * 1. ip6_rt_check(): If route was host route,
 857         *    check that cached destination is current.
 858         *    If it is network route, we still may
 859         *    check its validity using saved pointer
 860         *    to the last used address: daddr_cache.
 861         *    We do not want to save whole address now,
 862         *    (because main consumer of this service
 863         *    is tcp, which has not this problem),
 864         *    so that the last trick works only on connected
 865         *    sockets.
 866         * 2. oif also should be the same.
 867         */
 868        if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 869#ifdef CONFIG_IPV6_SUBTREES
 870            ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 871#endif
 872            (fl->oif && fl->oif != dst->dev->ifindex)) {
 873                dst_release(dst);
 874                dst = NULL;
 875        }
 876
 877out:
 878        return dst;
 879}
 880
 881static int ip6_dst_lookup_tail(struct sock *sk,
 882                               struct dst_entry **dst, struct flowi *fl)
 883{
 884        int err;
 885
 886        if (*dst == NULL)
 887                *dst = ip6_route_output(sk, fl);
 888
 889        if ((err = (*dst)->error))
 890                goto out_err_release;
 891
 892        if (ipv6_addr_any(&fl->fl6_src)) {
 893                err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 894                if (err)
 895                        goto out_err_release;
 896        }
 897
 898#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 899                /*
 900                 * Here if the dst entry we've looked up
 901                 * has a neighbour entry that is in the INCOMPLETE
 902                 * state and the src address from the flow is
 903                 * marked as OPTIMISTIC, we release the found
 904                 * dst entry and replace it instead with the
 905                 * dst entry of the nexthop router
 906                 */
 907                if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 908                        struct inet6_ifaddr *ifp;
 909                        struct flowi fl_gw;
 910                        int redirect;
 911
 912                        ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
 913
 914                        redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 915                        if (ifp)
 916                                in6_ifa_put(ifp);
 917
 918                        if (redirect) {
 919                                /*
 920                                 * We need to get the dst entry for the
 921                                 * default router instead
 922                                 */
 923                                dst_release(*dst);
 924                                memcpy(&fl_gw, fl, sizeof(struct flowi));
 925                                memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 926                                *dst = ip6_route_output(sk, &fl_gw);
 927                                if ((err = (*dst)->error))
 928                                        goto out_err_release;
 929                        }
 930                }
 931#endif
 932
 933        return 0;
 934
 935out_err_release:
 936        if (err == -ENETUNREACH)
 937                IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
 938        dst_release(*dst);
 939        *dst = NULL;
 940        return err;
 941}
 942
 943/**
 944 *      ip6_dst_lookup - perform route lookup on flow
 945 *      @sk: socket which provides route info
 946 *      @dst: pointer to dst_entry * for result
 947 *      @fl: flow to lookup
 948 *
 949 *      This function performs a route lookup on the given flow.
 950 *
 951 *      It returns zero on success, or a standard errno code on error.
 952 */
 953int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 954{
 955        *dst = NULL;
 956        return ip6_dst_lookup_tail(sk, dst, fl);
 957}
 958EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 959
 960/**
 961 *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
 962 *      @sk: socket which provides the dst cache and route info
 963 *      @dst: pointer to dst_entry * for result
 964 *      @fl: flow to lookup
 965 *
 966 *      This function performs a route lookup on the given flow with the
 967 *      possibility of using the cached route in the socket if it is valid.
 968 *      It will take the socket dst lock when operating on the dst cache.
 969 *      As a result, this function can only be used in process context.
 970 *
 971 *      It returns zero on success, or a standard errno code on error.
 972 */
 973int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 974{
 975        *dst = NULL;
 976        if (sk) {
 977                *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 978                *dst = ip6_sk_dst_check(sk, *dst, fl);
 979        }
 980
 981        return ip6_dst_lookup_tail(sk, dst, fl);
 982}
 983EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
 984
 985static inline int ip6_ufo_append_data(struct sock *sk,
 986                        int getfrag(void *from, char *to, int offset, int len,
 987                        int odd, struct sk_buff *skb),
 988                        void *from, int length, int hh_len, int fragheaderlen,
 989                        int transhdrlen, int mtu,unsigned int flags)
 990
 991{
 992        struct sk_buff *skb;
 993        int err;
 994
 995        /* There is support for UDP large send offload by network
 996         * device, so create one single skb packet containing complete
 997         * udp datagram
 998         */
 999        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1000                skb = sock_alloc_send_skb(sk,
1001                        hh_len + fragheaderlen + transhdrlen + 20,
1002                        (flags & MSG_DONTWAIT), &err);
1003                if (skb == NULL)
1004                        return -ENOMEM;
1005
1006                /* reserve space for Hardware header */
1007                skb_reserve(skb, hh_len);
1008
1009                /* create space for UDP/IP header */
1010                skb_put(skb,fragheaderlen + transhdrlen);
1011
1012                /* initialize network header pointer */
1013                skb_reset_network_header(skb);
1014
1015                /* initialize protocol header pointer */
1016                skb->transport_header = skb->network_header + fragheaderlen;
1017
1018                skb->ip_summed = CHECKSUM_PARTIAL;
1019                skb->csum = 0;
1020                sk->sk_sndmsg_off = 0;
1021        }
1022
1023        err = skb_append_datato_frags(sk,skb, getfrag, from,
1024                                      (length - transhdrlen));
1025        if (!err) {
1026                struct frag_hdr fhdr;
1027
1028                /* specify the length of each IP datagram fragment*/
1029                skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1030                                            sizeof(struct frag_hdr);
1031                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1032                ipv6_select_ident(skb, &fhdr);
1033                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1034                __skb_queue_tail(&sk->sk_write_queue, skb);
1035
1036                return 0;
1037        }
1038        /* There is not enough support do UPD LSO,
1039         * so follow normal path
1040         */
1041        kfree_skb(skb);
1042
1043        return err;
1044}
1045
1046int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1047        int offset, int len, int odd, struct sk_buff *skb),
1048        void *from, int length, int transhdrlen,
1049        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1050        struct rt6_info *rt, unsigned int flags)
1051{
1052        struct inet_sock *inet = inet_sk(sk);
1053        struct ipv6_pinfo *np = inet6_sk(sk);
1054        struct sk_buff *skb;
1055        unsigned int maxfraglen, fragheaderlen;
1056        int exthdrlen;
1057        int hh_len;
1058        int mtu;
1059        int copy;
1060        int err;
1061        int offset = 0;
1062        int csummode = CHECKSUM_NONE;
1063
1064        if (flags&MSG_PROBE)
1065                return 0;
1066        if (skb_queue_empty(&sk->sk_write_queue)) {
1067                /*
1068                 * setup for corking
1069                 */
1070                if (opt) {
1071                        if (np->cork.opt == NULL) {
1072                                np->cork.opt = kmalloc(opt->tot_len,
1073                                                       sk->sk_allocation);
1074                                if (unlikely(np->cork.opt == NULL))
1075                                        return -ENOBUFS;
1076                        } else if (np->cork.opt->tot_len < opt->tot_len) {
1077                                printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1078                                return -EINVAL;
1079                        }
1080                        memcpy(np->cork.opt, opt, opt->tot_len);
1081                        inet->cork.flags |= IPCORK_OPT;
1082                        /* need source address above miyazawa*/
1083                }
1084                dst_hold(&rt->u.dst);
1085                np->cork.rt = rt;
1086                inet->cork.fl = *fl;
1087                np->cork.hop_limit = hlimit;
1088                np->cork.tclass = tclass;
1089                mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1090                      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1091                if (np->frag_size < mtu) {
1092                        if (np->frag_size)
1093                                mtu = np->frag_size;
1094                }
1095                inet->cork.fragsize = mtu;
1096                if (dst_allfrag(rt->u.dst.path))
1097                        inet->cork.flags |= IPCORK_ALLFRAG;
1098                inet->cork.length = 0;
1099                sk->sk_sndmsg_page = NULL;
1100                sk->sk_sndmsg_off = 0;
1101                exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1102                length += exthdrlen;
1103                transhdrlen += exthdrlen;
1104        } else {
1105                rt = np->cork.rt;
1106                fl = &inet->cork.fl;
1107                if (inet->cork.flags & IPCORK_OPT)
1108                        opt = np->cork.opt;
1109                transhdrlen = 0;
1110                exthdrlen = 0;
1111                mtu = inet->cork.fragsize;
1112        }
1113
1114        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1115
1116        fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1117        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1118
1119        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1120                if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1121                        ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1122                        return -EMSGSIZE;
1123                }
1124        }
1125
1126        /*
1127         * Let's try using as much space as possible.
1128         * Use MTU if total length of the message fits into the MTU.
1129         * Otherwise, we need to reserve fragment header and
1130         * fragment alignment (= 8-15 octects, in total).
1131         *
1132         * Note that we may need to "move" the data from the tail of
1133         * of the buffer to the new fragment when we split
1134         * the message.
1135         *
1136         * FIXME: It may be fragmented into multiple chunks
1137         *        at once if non-fragmentable extension headers
1138         *        are too large.
1139         * --yoshfuji
1140         */
1141
1142        inet->cork.length += length;
1143        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1144            (rt->u.dst.dev->features & NETIF_F_UFO)) {
1145
1146                err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1147                                          fragheaderlen, transhdrlen, mtu,
1148                                          flags);
1149                if (err)
1150                        goto error;
1151                return 0;
1152        }
1153
1154        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1155                goto alloc_new_skb;
1156
1157        while (length > 0) {
1158                /* Check if the remaining data fits into current packet. */
1159                copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1160                if (copy < length)
1161                        copy = maxfraglen - skb->len;
1162
1163                if (copy <= 0) {
1164                        char *data;
1165                        unsigned int datalen;
1166                        unsigned int fraglen;
1167                        unsigned int fraggap;
1168                        unsigned int alloclen;
1169                        struct sk_buff *skb_prev;
1170alloc_new_skb:
1171                        skb_prev = skb;
1172
1173                        /* There's no room in the current skb */
1174                        if (skb_prev)
1175                                fraggap = skb_prev->len - maxfraglen;
1176                        else
1177                                fraggap = 0;
1178
1179                        /*
1180                         * If remaining data exceeds the mtu,
1181                         * we know we need more fragment(s).
1182                         */
1183                        datalen = length + fraggap;
1184                        if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1185                                datalen = maxfraglen - fragheaderlen;
1186
1187                        fraglen = datalen + fragheaderlen;
1188                        if ((flags & MSG_MORE) &&
1189                            !(rt->u.dst.dev->features&NETIF_F_SG))
1190                                alloclen = mtu;
1191                        else
1192                                alloclen = datalen + fragheaderlen;
1193
1194                        /*
1195                         * The last fragment gets additional space at tail.
1196                         * Note: we overallocate on fragments with MSG_MODE
1197                         * because we have no idea if we're the last one.
1198                         */
1199                        if (datalen == length + fraggap)
1200                                alloclen += rt->u.dst.trailer_len;
1201
1202                        /*
1203                         * We just reserve space for fragment header.
1204                         * Note: this may be overallocation if the message
1205                         * (without MSG_MORE) fits into the MTU.
1206                         */
1207                        alloclen += sizeof(struct frag_hdr);
1208
1209                        if (transhdrlen) {
1210                                skb = sock_alloc_send_skb(sk,
1211                                                alloclen + hh_len,
1212                                                (flags & MSG_DONTWAIT), &err);
1213                        } else {
1214                                skb = NULL;
1215                                if (atomic_read(&sk->sk_wmem_alloc) <=
1216                                    2 * sk->sk_sndbuf)
1217                                        skb = sock_wmalloc(sk,
1218                                                           alloclen + hh_len, 1,
1219                                                           sk->sk_allocation);
1220                                if (unlikely(skb == NULL))
1221                                        err = -ENOBUFS;
1222                        }
1223                        if (skb == NULL)
1224                                goto error;
1225                        /*
1226                         *      Fill in the control structures
1227                         */
1228                        skb->ip_summed = csummode;
1229                        skb->csum = 0;
1230                        /* reserve for fragmentation */
1231                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1232
1233                        /*
1234                         *      Find where to start putting bytes
1235                         */
1236                        data = skb_put(skb, fraglen);
1237                        skb_set_network_header(skb, exthdrlen);
1238                        data += fragheaderlen;
1239                        skb->transport_header = (skb->network_header +
1240                                                 fragheaderlen);
1241                        if (fraggap) {
1242                                skb->csum = skb_copy_and_csum_bits(
1243                                        skb_prev, maxfraglen,
1244                                        data + transhdrlen, fraggap, 0);
1245                                skb_prev->csum = csum_sub(skb_prev->csum,
1246                                                          skb->csum);
1247                                data += fraggap;
1248                                pskb_trim_unique(skb_prev, maxfraglen);
1249                        }
1250                        copy = datalen - transhdrlen - fraggap;
1251                        if (copy < 0) {
1252                                err = -EINVAL;
1253                                kfree_skb(skb);
1254                                goto error;
1255                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1256                                err = -EFAULT;
1257                                kfree_skb(skb);
1258                                goto error;
1259                        }
1260
1261                        offset += copy;
1262                        length -= datalen - fraggap;
1263                        transhdrlen = 0;
1264                        exthdrlen = 0;
1265                        csummode = CHECKSUM_NONE;
1266
1267                        /*
1268                         * Put the packet on the pending queue
1269                         */
1270                        __skb_queue_tail(&sk->sk_write_queue, skb);
1271                        continue;
1272                }
1273
1274                if (copy > length)
1275                        copy = length;
1276
1277                if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1278                        unsigned int off;
1279
1280                        off = skb->len;
1281                        if (getfrag(from, skb_put(skb, copy),
1282                                                offset, copy, off, skb) < 0) {
1283                                __skb_trim(skb, off);
1284                                err = -EFAULT;
1285                                goto error;
1286                        }
1287                } else {
1288                        int i = skb_shinfo(skb)->nr_frags;
1289                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1290                        struct page *page = sk->sk_sndmsg_page;
1291                        int off = sk->sk_sndmsg_off;
1292                        unsigned int left;
1293
1294                        if (page && (left = PAGE_SIZE - off) > 0) {
1295                                if (copy >= left)
1296                                        copy = left;
1297                                if (page != frag->page) {
1298                                        if (i == MAX_SKB_FRAGS) {
1299                                                err = -EMSGSIZE;
1300                                                goto error;
1301                                        }
1302                                        get_page(page);
1303                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1304                                        frag = &skb_shinfo(skb)->frags[i];
1305                                }
1306                        } else if(i < MAX_SKB_FRAGS) {
1307                                if (copy > PAGE_SIZE)
1308                                        copy = PAGE_SIZE;
1309                                page = alloc_pages(sk->sk_allocation, 0);
1310                                if (page == NULL) {
1311                                        err = -ENOMEM;
1312                                        goto error;
1313                                }
1314                                sk->sk_sndmsg_page = page;
1315                                sk->sk_sndmsg_off = 0;
1316
1317                                skb_fill_page_desc(skb, i, page, 0, 0);
1318                                frag = &skb_shinfo(skb)->frags[i];
1319                        } else {
1320                                err = -EMSGSIZE;
1321                                goto error;
1322                        }
1323                        if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1324                                err = -EFAULT;
1325                                goto error;
1326                        }
1327                        sk->sk_sndmsg_off += copy;
1328                        frag->size += copy;
1329                        skb->len += copy;
1330                        skb->data_len += copy;
1331                        skb->truesize += copy;
1332                        atomic_add(copy, &sk->sk_wmem_alloc);
1333                }
1334                offset += copy;
1335                length -= copy;
1336        }
1337        return 0;
1338error:
1339        inet->cork.length -= length;
1340        IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1341        return err;
1342}
1343
1344static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1345{
1346        inet->cork.flags &= ~IPCORK_OPT;
1347        kfree(np->cork.opt);
1348        np->cork.opt = NULL;
1349        if (np->cork.rt) {
1350                dst_release(&np->cork.rt->u.dst);
1351                np->cork.rt = NULL;
1352                inet->cork.flags &= ~IPCORK_ALLFRAG;
1353        }
1354        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1355}
1356
1357int ip6_push_pending_frames(struct sock *sk)
1358{
1359        struct sk_buff *skb, *tmp_skb;
1360        struct sk_buff **tail_skb;
1361        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1362        struct inet_sock *inet = inet_sk(sk);
1363        struct ipv6_pinfo *np = inet6_sk(sk);
1364        struct ipv6hdr *hdr;
1365        struct ipv6_txoptions *opt = np->cork.opt;
1366        struct rt6_info *rt = np->cork.rt;
1367        struct flowi *fl = &inet->cork.fl;
1368        unsigned char proto = fl->proto;
1369        int err = 0;
1370
1371        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1372                goto out;
1373        tail_skb = &(skb_shinfo(skb)->frag_list);
1374
1375        /* move skb->data to ip header from ext header */
1376        if (skb->data < skb_network_header(skb))
1377                __skb_pull(skb, skb_network_offset(skb));
1378        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1379                __skb_pull(tmp_skb, skb_network_header_len(skb));
1380                *tail_skb = tmp_skb;
1381                tail_skb = &(tmp_skb->next);
1382                skb->len += tmp_skb->len;
1383                skb->data_len += tmp_skb->len;
1384                skb->truesize += tmp_skb->truesize;
1385                __sock_put(tmp_skb->sk);
1386                tmp_skb->destructor = NULL;
1387                tmp_skb->sk = NULL;
1388        }
1389
1390        ipv6_addr_copy(final_dst, &fl->fl6_dst);
1391        __skb_pull(skb, skb_network_header_len(skb));
1392        if (opt && opt->opt_flen)
1393                ipv6_push_frag_opts(skb, opt, &proto);
1394        if (opt && opt->opt_nflen)
1395                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1396
1397        skb_push(skb, sizeof(struct ipv6hdr));
1398        skb_reset_network_header(skb);
1399        hdr = ipv6_hdr(skb);
1400
1401        *(__be32*)hdr = fl->fl6_flowlabel |
1402                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1403
1404        if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1405                hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1406        else
1407                hdr->payload_len = 0;
1408        hdr->hop_limit = np->cork.hop_limit;
1409        hdr->nexthdr = proto;
1410        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1411        ipv6_addr_copy(&hdr->daddr, final_dst);
1412
1413        skb->priority = sk->sk_priority;
1414
1415        skb->dst = dst_clone(&rt->u.dst);
1416        IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1417        if (proto == IPPROTO_ICMPV6) {
1418                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1419
1420                ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1421                ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1422        }
1423
1424        err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1425        if (err) {
1426                if (err > 0)
1427                        err = np->recverr ? net_xmit_errno(err) : 0;
1428                if (err)
1429                        goto error;
1430        }
1431
1432out:
1433        ip6_cork_release(inet, np);
1434        return err;
1435error:
1436        goto out;
1437}
1438
1439void ip6_flush_pending_frames(struct sock *sk)
1440{
1441        struct sk_buff *skb;
1442
1443        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1444                if (skb->dst)
1445                        IP6_INC_STATS(ip6_dst_idev(skb->dst),
1446                                      IPSTATS_MIB_OUTDISCARDS);
1447                kfree_skb(skb);
1448        }
1449
1450        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1451}
1452