linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58
  59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61int __ip6_local_out(struct sk_buff *skb)
  62{
  63        int len;
  64
  65        len = skb->len - sizeof(struct ipv6hdr);
  66        if (len > IPV6_MAXPLEN)
  67                len = 0;
  68        ipv6_hdr(skb)->payload_len = htons(len);
  69
  70        return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
  71                       skb_dst(skb)->dev, dst_output);
  72}
  73
  74int ip6_local_out(struct sk_buff *skb)
  75{
  76        int err;
  77
  78        err = __ip6_local_out(skb);
  79        if (likely(err == 1))
  80                err = dst_output(skb);
  81
  82        return err;
  83}
  84EXPORT_SYMBOL_GPL(ip6_local_out);
  85
  86/* dev_loopback_xmit for use with netfilter. */
  87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  88{
  89        skb_reset_mac_header(newskb);
  90        __skb_pull(newskb, skb_network_offset(newskb));
  91        newskb->pkt_type = PACKET_LOOPBACK;
  92        newskb->ip_summed = CHECKSUM_UNNECESSARY;
  93        WARN_ON(!skb_dst(newskb));
  94
  95        netif_rx_ni(newskb);
  96        return 0;
  97}
  98
  99static int ip6_finish_output2(struct sk_buff *skb)
 100{
 101        struct dst_entry *dst = skb_dst(skb);
 102        struct net_device *dev = dst->dev;
 103        struct neighbour *neigh;
 104
 105        skb->protocol = htons(ETH_P_IPV6);
 106        skb->dev = dev;
 107
 108        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 109                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 110
 111                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 112                    ((mroute6_socket(dev_net(dev), skb) &&
 113                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 114                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 115                                         &ipv6_hdr(skb)->saddr))) {
 116                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 117
 118                        /* Do not check for IFF_ALLMULTI; multicast routing
 119                           is not supported in any case.
 120                         */
 121                        if (newskb)
 122                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 123                                        newskb, NULL, newskb->dev,
 124                                        ip6_dev_loopback_xmit);
 125
 126                        if (ipv6_hdr(skb)->hop_limit == 0) {
 127                                IP6_INC_STATS(dev_net(dev), idev,
 128                                              IPSTATS_MIB_OUTDISCARDS);
 129                                kfree_skb(skb);
 130                                return 0;
 131                        }
 132                }
 133
 134                IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 135                                skb->len);
 136        }
 137
 138        rcu_read_lock();
 139        neigh = dst_get_neighbour(dst);
 140        if (neigh) {
 141                int res = neigh_output(neigh, skb);
 142
 143                rcu_read_unlock();
 144                return res;
 145        }
 146        rcu_read_unlock();
 147        IP6_INC_STATS_BH(dev_net(dst->dev),
 148                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 149        kfree_skb(skb);
 150        return -EINVAL;
 151}
 152
 153static int ip6_finish_output(struct sk_buff *skb)
 154{
 155        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 156            dst_allfrag(skb_dst(skb)))
 157                return ip6_fragment(skb, ip6_finish_output2);
 158        else
 159                return ip6_finish_output2(skb);
 160}
 161
 162int ip6_output(struct sk_buff *skb)
 163{
 164        struct net_device *dev = skb_dst(skb)->dev;
 165        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 166        if (unlikely(idev->cnf.disable_ipv6)) {
 167                IP6_INC_STATS(dev_net(dev), idev,
 168                              IPSTATS_MIB_OUTDISCARDS);
 169                kfree_skb(skb);
 170                return 0;
 171        }
 172
 173        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 174                            ip6_finish_output,
 175                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 176}
 177
 178/*
 179 *      xmit an sk_buff (used by TCP, SCTP and DCCP)
 180 */
 181
 182int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 183             struct ipv6_txoptions *opt, int tclass)
 184{
 185        struct net *net = sock_net(sk);
 186        struct ipv6_pinfo *np = inet6_sk(sk);
 187        struct in6_addr *first_hop = &fl6->daddr;
 188        struct dst_entry *dst = skb_dst(skb);
 189        struct ipv6hdr *hdr;
 190        u8  proto = fl6->flowi6_proto;
 191        int seg_len = skb->len;
 192        int hlimit = -1;
 193        u32 mtu;
 194
 195        if (opt) {
 196                unsigned int head_room;
 197
 198                /* First: exthdrs may take lots of space (~8K for now)
 199                   MAX_HEADER is not enough.
 200                 */
 201                head_room = opt->opt_nflen + opt->opt_flen;
 202                seg_len += head_room;
 203                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 204
 205                if (skb_headroom(skb) < head_room) {
 206                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 207                        if (skb2 == NULL) {
 208                                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 209                                              IPSTATS_MIB_OUTDISCARDS);
 210                                kfree_skb(skb);
 211                                return -ENOBUFS;
 212                        }
 213                        kfree_skb(skb);
 214                        skb = skb2;
 215                        skb_set_owner_w(skb, sk);
 216                }
 217                if (opt->opt_flen)
 218                        ipv6_push_frag_opts(skb, opt, &proto);
 219                if (opt->opt_nflen)
 220                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 221        }
 222
 223        skb_push(skb, sizeof(struct ipv6hdr));
 224        skb_reset_network_header(skb);
 225        hdr = ipv6_hdr(skb);
 226
 227        /*
 228         *      Fill in the IPv6 header
 229         */
 230        if (np)
 231                hlimit = np->hop_limit;
 232        if (hlimit < 0)
 233                hlimit = ip6_dst_hoplimit(dst);
 234
 235        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
 236
 237        hdr->payload_len = htons(seg_len);
 238        hdr->nexthdr = proto;
 239        hdr->hop_limit = hlimit;
 240
 241        ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
 242        ipv6_addr_copy(&hdr->daddr, first_hop);
 243
 244        skb->priority = sk->sk_priority;
 245        skb->mark = sk->sk_mark;
 246
 247        mtu = dst_mtu(dst);
 248        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 249                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 250                              IPSTATS_MIB_OUT, skb->len);
 251                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 252                               dst->dev, dst_output);
 253        }
 254
 255        if (net_ratelimit())
 256                printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 257        skb->dev = dst->dev;
 258        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 259        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 260        kfree_skb(skb);
 261        return -EMSGSIZE;
 262}
 263
 264EXPORT_SYMBOL(ip6_xmit);
 265
 266/*
 267 *      To avoid extra problems ND packets are send through this
 268 *      routine. It's code duplication but I really want to avoid
 269 *      extra checks since ipv6_build_header is used by TCP (which
 270 *      is for us performance critical)
 271 */
 272
 273int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 274               const struct in6_addr *saddr, const struct in6_addr *daddr,
 275               int proto, int len)
 276{
 277        struct ipv6_pinfo *np = inet6_sk(sk);
 278        struct ipv6hdr *hdr;
 279
 280        skb->protocol = htons(ETH_P_IPV6);
 281        skb->dev = dev;
 282
 283        skb_reset_network_header(skb);
 284        skb_put(skb, sizeof(struct ipv6hdr));
 285        hdr = ipv6_hdr(skb);
 286
 287        *(__be32*)hdr = htonl(0x60000000);
 288
 289        hdr->payload_len = htons(len);
 290        hdr->nexthdr = proto;
 291        hdr->hop_limit = np->hop_limit;
 292
 293        ipv6_addr_copy(&hdr->saddr, saddr);
 294        ipv6_addr_copy(&hdr->daddr, daddr);
 295
 296        return 0;
 297}
 298
 299static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 300{
 301        struct ip6_ra_chain *ra;
 302        struct sock *last = NULL;
 303
 304        read_lock(&ip6_ra_lock);
 305        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 306                struct sock *sk = ra->sk;
 307                if (sk && ra->sel == sel &&
 308                    (!sk->sk_bound_dev_if ||
 309                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 310                        if (last) {
 311                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 312                                if (skb2)
 313                                        rawv6_rcv(last, skb2);
 314                        }
 315                        last = sk;
 316                }
 317        }
 318
 319        if (last) {
 320                rawv6_rcv(last, skb);
 321                read_unlock(&ip6_ra_lock);
 322                return 1;
 323        }
 324        read_unlock(&ip6_ra_lock);
 325        return 0;
 326}
 327
 328static int ip6_forward_proxy_check(struct sk_buff *skb)
 329{
 330        struct ipv6hdr *hdr = ipv6_hdr(skb);
 331        u8 nexthdr = hdr->nexthdr;
 332        int offset;
 333
 334        if (ipv6_ext_hdr(nexthdr)) {
 335                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 336                if (offset < 0)
 337                        return 0;
 338        } else
 339                offset = sizeof(struct ipv6hdr);
 340
 341        if (nexthdr == IPPROTO_ICMPV6) {
 342                struct icmp6hdr *icmp6;
 343
 344                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 345                                         offset + 1 - skb->data)))
 346                        return 0;
 347
 348                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 349
 350                switch (icmp6->icmp6_type) {
 351                case NDISC_ROUTER_SOLICITATION:
 352                case NDISC_ROUTER_ADVERTISEMENT:
 353                case NDISC_NEIGHBOUR_SOLICITATION:
 354                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 355                case NDISC_REDIRECT:
 356                        /* For reaction involving unicast neighbor discovery
 357                         * message destined to the proxied address, pass it to
 358                         * input function.
 359                         */
 360                        return 1;
 361                default:
 362                        break;
 363                }
 364        }
 365
 366        /*
 367         * The proxying router can't forward traffic sent to a link-local
 368         * address, so signal the sender and discard the packet. This
 369         * behavior is clarified by the MIPv6 specification.
 370         */
 371        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 372                dst_link_failure(skb);
 373                return -1;
 374        }
 375
 376        return 0;
 377}
 378
 379static inline int ip6_forward_finish(struct sk_buff *skb)
 380{
 381        return dst_output(skb);
 382}
 383
 384int ip6_forward(struct sk_buff *skb)
 385{
 386        struct dst_entry *dst = skb_dst(skb);
 387        struct ipv6hdr *hdr = ipv6_hdr(skb);
 388        struct inet6_skb_parm *opt = IP6CB(skb);
 389        struct net *net = dev_net(dst->dev);
 390        struct neighbour *n;
 391        u32 mtu;
 392
 393        if (net->ipv6.devconf_all->forwarding == 0)
 394                goto error;
 395
 396        if (skb_warn_if_lro(skb))
 397                goto drop;
 398
 399        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 400                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 401                goto drop;
 402        }
 403
 404        if (skb->pkt_type != PACKET_HOST)
 405                goto drop;
 406
 407        skb_forward_csum(skb);
 408
 409        /*
 410         *      We DO NOT make any processing on
 411         *      RA packets, pushing them to user level AS IS
 412         *      without ane WARRANTY that application will be able
 413         *      to interpret them. The reason is that we
 414         *      cannot make anything clever here.
 415         *
 416         *      We are not end-node, so that if packet contains
 417         *      AH/ESP, we cannot make anything.
 418         *      Defragmentation also would be mistake, RA packets
 419         *      cannot be fragmented, because there is no warranty
 420         *      that different fragments will go along one path. --ANK
 421         */
 422        if (opt->ra) {
 423                u8 *ptr = skb_network_header(skb) + opt->ra;
 424                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 425                        return 0;
 426        }
 427
 428        /*
 429         *      check and decrement ttl
 430         */
 431        if (hdr->hop_limit <= 1) {
 432                /* Force OUTPUT device used as source address */
 433                skb->dev = dst->dev;
 434                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 435                IP6_INC_STATS_BH(net,
 436                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 437
 438                kfree_skb(skb);
 439                return -ETIMEDOUT;
 440        }
 441
 442        /* XXX: idev->cnf.proxy_ndp? */
 443        if (net->ipv6.devconf_all->proxy_ndp &&
 444            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 445                int proxied = ip6_forward_proxy_check(skb);
 446                if (proxied > 0)
 447                        return ip6_input(skb);
 448                else if (proxied < 0) {
 449                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 450                                      IPSTATS_MIB_INDISCARDS);
 451                        goto drop;
 452                }
 453        }
 454
 455        if (!xfrm6_route_forward(skb)) {
 456                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 457                goto drop;
 458        }
 459        dst = skb_dst(skb);
 460
 461        /* IPv6 specs say nothing about it, but it is clear that we cannot
 462           send redirects to source routed frames.
 463           We don't send redirects to frames decapsulated from IPsec.
 464         */
 465        n = dst_get_neighbour(dst);
 466        if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
 467                struct in6_addr *target = NULL;
 468                struct rt6_info *rt;
 469
 470                /*
 471                 *      incoming and outgoing devices are the same
 472                 *      send a redirect.
 473                 */
 474
 475                rt = (struct rt6_info *) dst;
 476                if ((rt->rt6i_flags & RTF_GATEWAY))
 477                        target = (struct in6_addr*)&n->primary_key;
 478                else
 479                        target = &hdr->daddr;
 480
 481                if (!rt->rt6i_peer)
 482                        rt6_bind_peer(rt, 1);
 483
 484                /* Limit redirects both by destination (here)
 485                   and by source (inside ndisc_send_redirect)
 486                 */
 487                if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
 488                        ndisc_send_redirect(skb, n, target);
 489        } else {
 490                int addrtype = ipv6_addr_type(&hdr->saddr);
 491
 492                /* This check is security critical. */
 493                if (addrtype == IPV6_ADDR_ANY ||
 494                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 495                        goto error;
 496                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 497                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 498                                    ICMPV6_NOT_NEIGHBOUR, 0);
 499                        goto error;
 500                }
 501        }
 502
 503        mtu = dst_mtu(dst);
 504        if (mtu < IPV6_MIN_MTU)
 505                mtu = IPV6_MIN_MTU;
 506
 507        if (skb->len > mtu && !skb_is_gso(skb)) {
 508                /* Again, force OUTPUT device used as source address */
 509                skb->dev = dst->dev;
 510                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 511                IP6_INC_STATS_BH(net,
 512                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 513                IP6_INC_STATS_BH(net,
 514                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 515                kfree_skb(skb);
 516                return -EMSGSIZE;
 517        }
 518
 519        if (skb_cow(skb, dst->dev->hard_header_len)) {
 520                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 521                goto drop;
 522        }
 523
 524        hdr = ipv6_hdr(skb);
 525
 526        /* Mangling hops number delayed to point after skb COW */
 527
 528        hdr->hop_limit--;
 529
 530        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 531        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 532                       ip6_forward_finish);
 533
 534error:
 535        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 536drop:
 537        kfree_skb(skb);
 538        return -EINVAL;
 539}
 540
 541static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 542{
 543        to->pkt_type = from->pkt_type;
 544        to->priority = from->priority;
 545        to->protocol = from->protocol;
 546        skb_dst_drop(to);
 547        skb_dst_set(to, dst_clone(skb_dst(from)));
 548        to->dev = from->dev;
 549        to->mark = from->mark;
 550
 551#ifdef CONFIG_NET_SCHED
 552        to->tc_index = from->tc_index;
 553#endif
 554        nf_copy(to, from);
 555#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 556    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 557        to->nf_trace = from->nf_trace;
 558#endif
 559        skb_copy_secmark(to, from);
 560}
 561
 562int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 563{
 564        u16 offset = sizeof(struct ipv6hdr);
 565        struct ipv6_opt_hdr *exthdr =
 566                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 567        unsigned int packet_len = skb->tail - skb->network_header;
 568        int found_rhdr = 0;
 569        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 570
 571        while (offset + 1 <= packet_len) {
 572
 573                switch (**nexthdr) {
 574
 575                case NEXTHDR_HOP:
 576                        break;
 577                case NEXTHDR_ROUTING:
 578                        found_rhdr = 1;
 579                        break;
 580                case NEXTHDR_DEST:
 581#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 582                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 583                                break;
 584#endif
 585                        if (found_rhdr)
 586                                return offset;
 587                        break;
 588                default :
 589                        return offset;
 590                }
 591
 592                offset += ipv6_optlen(exthdr);
 593                *nexthdr = &exthdr->nexthdr;
 594                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 595                                                 offset);
 596        }
 597
 598        return offset;
 599}
 600
 601void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 602{
 603        static atomic_t ipv6_fragmentation_id;
 604        int old, new;
 605
 606        if (rt && !(rt->dst.flags & DST_NOPEER)) {
 607                struct inet_peer *peer;
 608
 609                if (!rt->rt6i_peer)
 610                        rt6_bind_peer(rt, 1);
 611                peer = rt->rt6i_peer;
 612                if (peer) {
 613                        fhdr->identification = htonl(inet_getid(peer, 0));
 614                        return;
 615                }
 616        }
 617        do {
 618                old = atomic_read(&ipv6_fragmentation_id);
 619                new = old + 1;
 620                if (!new)
 621                        new = 1;
 622        } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
 623        fhdr->identification = htonl(new);
 624}
 625
 626int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 627{
 628        struct sk_buff *frag;
 629        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 630        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 631        struct ipv6hdr *tmp_hdr;
 632        struct frag_hdr *fh;
 633        unsigned int mtu, hlen, left, len;
 634        __be32 frag_id = 0;
 635        int ptr, offset = 0, err=0;
 636        u8 *prevhdr, nexthdr = 0;
 637        struct net *net = dev_net(skb_dst(skb)->dev);
 638
 639        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 640        nexthdr = *prevhdr;
 641
 642        mtu = ip6_skb_dst_mtu(skb);
 643
 644        /* We must not fragment if the socket is set to force MTU discovery
 645         * or if the skb it not generated by a local socket.
 646         */
 647        if (!skb->local_df && skb->len > mtu) {
 648                skb->dev = skb_dst(skb)->dev;
 649                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 650                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 651                              IPSTATS_MIB_FRAGFAILS);
 652                kfree_skb(skb);
 653                return -EMSGSIZE;
 654        }
 655
 656        if (np && np->frag_size < mtu) {
 657                if (np->frag_size)
 658                        mtu = np->frag_size;
 659        }
 660        mtu -= hlen + sizeof(struct frag_hdr);
 661
 662        if (skb_has_frag_list(skb)) {
 663                int first_len = skb_pagelen(skb);
 664                struct sk_buff *frag2;
 665
 666                if (first_len - hlen > mtu ||
 667                    ((first_len - hlen) & 7) ||
 668                    skb_cloned(skb))
 669                        goto slow_path;
 670
 671                skb_walk_frags(skb, frag) {
 672                        /* Correct geometry. */
 673                        if (frag->len > mtu ||
 674                            ((frag->len & 7) && frag->next) ||
 675                            skb_headroom(frag) < hlen)
 676                                goto slow_path_clean;
 677
 678                        /* Partially cloned skb? */
 679                        if (skb_shared(frag))
 680                                goto slow_path_clean;
 681
 682                        BUG_ON(frag->sk);
 683                        if (skb->sk) {
 684                                frag->sk = skb->sk;
 685                                frag->destructor = sock_wfree;
 686                        }
 687                        skb->truesize -= frag->truesize;
 688                }
 689
 690                err = 0;
 691                offset = 0;
 692                frag = skb_shinfo(skb)->frag_list;
 693                skb_frag_list_init(skb);
 694                /* BUILD HEADER */
 695
 696                *prevhdr = NEXTHDR_FRAGMENT;
 697                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 698                if (!tmp_hdr) {
 699                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 700                                      IPSTATS_MIB_FRAGFAILS);
 701                        return -ENOMEM;
 702                }
 703
 704                __skb_pull(skb, hlen);
 705                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 706                __skb_push(skb, hlen);
 707                skb_reset_network_header(skb);
 708                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 709
 710                ipv6_select_ident(fh, rt);
 711                fh->nexthdr = nexthdr;
 712                fh->reserved = 0;
 713                fh->frag_off = htons(IP6_MF);
 714                frag_id = fh->identification;
 715
 716                first_len = skb_pagelen(skb);
 717                skb->data_len = first_len - skb_headlen(skb);
 718                skb->len = first_len;
 719                ipv6_hdr(skb)->payload_len = htons(first_len -
 720                                                   sizeof(struct ipv6hdr));
 721
 722                dst_hold(&rt->dst);
 723
 724                for (;;) {
 725                        /* Prepare header of the next frame,
 726                         * before previous one went down. */
 727                        if (frag) {
 728                                frag->ip_summed = CHECKSUM_NONE;
 729                                skb_reset_transport_header(frag);
 730                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 731                                __skb_push(frag, hlen);
 732                                skb_reset_network_header(frag);
 733                                memcpy(skb_network_header(frag), tmp_hdr,
 734                                       hlen);
 735                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 736                                fh->nexthdr = nexthdr;
 737                                fh->reserved = 0;
 738                                fh->frag_off = htons(offset);
 739                                if (frag->next != NULL)
 740                                        fh->frag_off |= htons(IP6_MF);
 741                                fh->identification = frag_id;
 742                                ipv6_hdr(frag)->payload_len =
 743                                                htons(frag->len -
 744                                                      sizeof(struct ipv6hdr));
 745                                ip6_copy_metadata(frag, skb);
 746                        }
 747
 748                        err = output(skb);
 749                        if(!err)
 750                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 751                                              IPSTATS_MIB_FRAGCREATES);
 752
 753                        if (err || !frag)
 754                                break;
 755
 756                        skb = frag;
 757                        frag = skb->next;
 758                        skb->next = NULL;
 759                }
 760
 761                kfree(tmp_hdr);
 762
 763                if (err == 0) {
 764                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 765                                      IPSTATS_MIB_FRAGOKS);
 766                        dst_release(&rt->dst);
 767                        return 0;
 768                }
 769
 770                while (frag) {
 771                        skb = frag->next;
 772                        kfree_skb(frag);
 773                        frag = skb;
 774                }
 775
 776                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 777                              IPSTATS_MIB_FRAGFAILS);
 778                dst_release(&rt->dst);
 779                return err;
 780
 781slow_path_clean:
 782                skb_walk_frags(skb, frag2) {
 783                        if (frag2 == frag)
 784                                break;
 785                        frag2->sk = NULL;
 786                        frag2->destructor = NULL;
 787                        skb->truesize += frag2->truesize;
 788                }
 789        }
 790
 791slow_path:
 792        left = skb->len - hlen;         /* Space per frame */
 793        ptr = hlen;                     /* Where to start from */
 794
 795        /*
 796         *      Fragment the datagram.
 797         */
 798
 799        *prevhdr = NEXTHDR_FRAGMENT;
 800
 801        /*
 802         *      Keep copying data until we run out.
 803         */
 804        while(left > 0) {
 805                len = left;
 806                /* IF: it doesn't fit, use 'mtu' - the data space left */
 807                if (len > mtu)
 808                        len = mtu;
 809                /* IF: we are not sending up to and including the packet end
 810                   then align the next start on an eight byte boundary */
 811                if (len < left) {
 812                        len &= ~7;
 813                }
 814                /*
 815                 *      Allocate buffer.
 816                 */
 817
 818                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
 819                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 820                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 821                                      IPSTATS_MIB_FRAGFAILS);
 822                        err = -ENOMEM;
 823                        goto fail;
 824                }
 825
 826                /*
 827                 *      Set up data on packet
 828                 */
 829
 830                ip6_copy_metadata(frag, skb);
 831                skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
 832                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 833                skb_reset_network_header(frag);
 834                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 835                frag->transport_header = (frag->network_header + hlen +
 836                                          sizeof(struct frag_hdr));
 837
 838                /*
 839                 *      Charge the memory for the fragment to any owner
 840                 *      it might possess
 841                 */
 842                if (skb->sk)
 843                        skb_set_owner_w(frag, skb->sk);
 844
 845                /*
 846                 *      Copy the packet header into the new buffer.
 847                 */
 848                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 849
 850                /*
 851                 *      Build fragment header.
 852                 */
 853                fh->nexthdr = nexthdr;
 854                fh->reserved = 0;
 855                if (!frag_id) {
 856                        ipv6_select_ident(fh, rt);
 857                        frag_id = fh->identification;
 858                } else
 859                        fh->identification = frag_id;
 860
 861                /*
 862                 *      Copy a block of the IP datagram.
 863                 */
 864                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 865                        BUG();
 866                left -= len;
 867
 868                fh->frag_off = htons(offset);
 869                if (left > 0)
 870                        fh->frag_off |= htons(IP6_MF);
 871                ipv6_hdr(frag)->payload_len = htons(frag->len -
 872                                                    sizeof(struct ipv6hdr));
 873
 874                ptr += len;
 875                offset += len;
 876
 877                /*
 878                 *      Put this fragment into the sending queue.
 879                 */
 880                err = output(frag);
 881                if (err)
 882                        goto fail;
 883
 884                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 885                              IPSTATS_MIB_FRAGCREATES);
 886        }
 887        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 888                      IPSTATS_MIB_FRAGOKS);
 889        kfree_skb(skb);
 890        return err;
 891
 892fail:
 893        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 894                      IPSTATS_MIB_FRAGFAILS);
 895        kfree_skb(skb);
 896        return err;
 897}
 898
 899static inline int ip6_rt_check(const struct rt6key *rt_key,
 900                               const struct in6_addr *fl_addr,
 901                               const struct in6_addr *addr_cache)
 902{
 903        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 904                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 905}
 906
 907static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 908                                          struct dst_entry *dst,
 909                                          const struct flowi6 *fl6)
 910{
 911        struct ipv6_pinfo *np = inet6_sk(sk);
 912        struct rt6_info *rt = (struct rt6_info *)dst;
 913
 914        if (!dst)
 915                goto out;
 916
 917        /* Yes, checking route validity in not connected
 918         * case is not very simple. Take into account,
 919         * that we do not support routing by source, TOS,
 920         * and MSG_DONTROUTE            --ANK (980726)
 921         *
 922         * 1. ip6_rt_check(): If route was host route,
 923         *    check that cached destination is current.
 924         *    If it is network route, we still may
 925         *    check its validity using saved pointer
 926         *    to the last used address: daddr_cache.
 927         *    We do not want to save whole address now,
 928         *    (because main consumer of this service
 929         *    is tcp, which has not this problem),
 930         *    so that the last trick works only on connected
 931         *    sockets.
 932         * 2. oif also should be the same.
 933         */
 934        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 935#ifdef CONFIG_IPV6_SUBTREES
 936            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 937#endif
 938            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 939                dst_release(dst);
 940                dst = NULL;
 941        }
 942
 943out:
 944        return dst;
 945}
 946
 947static int ip6_dst_lookup_tail(struct sock *sk,
 948                               struct dst_entry **dst, struct flowi6 *fl6)
 949{
 950        struct net *net = sock_net(sk);
 951#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 952        struct neighbour *n;
 953#endif
 954        int err;
 955
 956        if (*dst == NULL)
 957                *dst = ip6_route_output(net, sk, fl6);
 958
 959        if ((err = (*dst)->error))
 960                goto out_err_release;
 961
 962        if (ipv6_addr_any(&fl6->saddr)) {
 963                struct rt6_info *rt = (struct rt6_info *) *dst;
 964                err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 965                                          sk ? inet6_sk(sk)->srcprefs : 0,
 966                                          &fl6->saddr);
 967                if (err)
 968                        goto out_err_release;
 969        }
 970
 971#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 972        /*
 973         * Here if the dst entry we've looked up
 974         * has a neighbour entry that is in the INCOMPLETE
 975         * state and the src address from the flow is
 976         * marked as OPTIMISTIC, we release the found
 977         * dst entry and replace it instead with the
 978         * dst entry of the nexthop router
 979         */
 980        rcu_read_lock();
 981        n = dst_get_neighbour(*dst);
 982        if (n && !(n->nud_state & NUD_VALID)) {
 983                struct inet6_ifaddr *ifp;
 984                struct flowi6 fl_gw6;
 985                int redirect;
 986
 987                rcu_read_unlock();
 988                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 989                                      (*dst)->dev, 1);
 990
 991                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 992                if (ifp)
 993                        in6_ifa_put(ifp);
 994
 995                if (redirect) {
 996                        /*
 997                         * We need to get the dst entry for the
 998                         * default router instead
 999                         */
1000                        dst_release(*dst);
1001                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1002                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1003                        *dst = ip6_route_output(net, sk, &fl_gw6);
1004                        if ((err = (*dst)->error))
1005                                goto out_err_release;
1006                }
1007        } else {
1008                rcu_read_unlock();
1009        }
1010#endif
1011
1012        return 0;
1013
1014out_err_release:
1015        if (err == -ENETUNREACH)
1016                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1017        dst_release(*dst);
1018        *dst = NULL;
1019        return err;
1020}
1021
1022/**
1023 *      ip6_dst_lookup - perform route lookup on flow
1024 *      @sk: socket which provides route info
1025 *      @dst: pointer to dst_entry * for result
1026 *      @fl6: flow to lookup
1027 *
1028 *      This function performs a route lookup on the given flow.
1029 *
1030 *      It returns zero on success, or a standard errno code on error.
1031 */
1032int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1033{
1034        *dst = NULL;
1035        return ip6_dst_lookup_tail(sk, dst, fl6);
1036}
1037EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1038
1039/**
1040 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1041 *      @sk: socket which provides route info
1042 *      @fl6: flow to lookup
1043 *      @final_dst: final destination address for ipsec lookup
1044 *      @can_sleep: we are in a sleepable context
1045 *
1046 *      This function performs a route lookup on the given flow.
1047 *
1048 *      It returns a valid dst pointer on success, or a pointer encoded
1049 *      error code.
1050 */
1051struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1052                                      const struct in6_addr *final_dst,
1053                                      bool can_sleep)
1054{
1055        struct dst_entry *dst = NULL;
1056        int err;
1057
1058        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1059        if (err)
1060                return ERR_PTR(err);
1061        if (final_dst)
1062                ipv6_addr_copy(&fl6->daddr, final_dst);
1063        if (can_sleep)
1064                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1065
1066        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1067}
1068EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1069
1070/**
1071 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1072 *      @sk: socket which provides the dst cache and route info
1073 *      @fl6: flow to lookup
1074 *      @final_dst: final destination address for ipsec lookup
1075 *      @can_sleep: we are in a sleepable context
1076 *
1077 *      This function performs a route lookup on the given flow with the
1078 *      possibility of using the cached route in the socket if it is valid.
1079 *      It will take the socket dst lock when operating on the dst cache.
1080 *      As a result, this function can only be used in process context.
1081 *
1082 *      It returns a valid dst pointer on success, or a pointer encoded
1083 *      error code.
1084 */
1085struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1086                                         const struct in6_addr *final_dst,
1087                                         bool can_sleep)
1088{
1089        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1090        int err;
1091
1092        dst = ip6_sk_dst_check(sk, dst, fl6);
1093
1094        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1095        if (err)
1096                return ERR_PTR(err);
1097        if (final_dst)
1098                ipv6_addr_copy(&fl6->daddr, final_dst);
1099        if (can_sleep)
1100                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1101
1102        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1103}
1104EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1105
1106static inline int ip6_ufo_append_data(struct sock *sk,
1107                        int getfrag(void *from, char *to, int offset, int len,
1108                        int odd, struct sk_buff *skb),
1109                        void *from, int length, int hh_len, int fragheaderlen,
1110                        int transhdrlen, int mtu,unsigned int flags,
1111                        struct rt6_info *rt)
1112
1113{
1114        struct sk_buff *skb;
1115        int err;
1116
1117        /* There is support for UDP large send offload by network
1118         * device, so create one single skb packet containing complete
1119         * udp datagram
1120         */
1121        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1122                skb = sock_alloc_send_skb(sk,
1123                        hh_len + fragheaderlen + transhdrlen + 20,
1124                        (flags & MSG_DONTWAIT), &err);
1125                if (skb == NULL)
1126                        return err;
1127
1128                /* reserve space for Hardware header */
1129                skb_reserve(skb, hh_len);
1130
1131                /* create space for UDP/IP header */
1132                skb_put(skb,fragheaderlen + transhdrlen);
1133
1134                /* initialize network header pointer */
1135                skb_reset_network_header(skb);
1136
1137                /* initialize protocol header pointer */
1138                skb->transport_header = skb->network_header + fragheaderlen;
1139
1140                skb->ip_summed = CHECKSUM_PARTIAL;
1141                skb->csum = 0;
1142        }
1143
1144        err = skb_append_datato_frags(sk,skb, getfrag, from,
1145                                      (length - transhdrlen));
1146        if (!err) {
1147                struct frag_hdr fhdr;
1148
1149                /* Specify the length of each IPv6 datagram fragment.
1150                 * It has to be a multiple of 8.
1151                 */
1152                skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1153                                             sizeof(struct frag_hdr)) & ~7;
1154                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1155                ipv6_select_ident(&fhdr, rt);
1156                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1157                __skb_queue_tail(&sk->sk_write_queue, skb);
1158
1159                return 0;
1160        }
1161        /* There is not enough support do UPD LSO,
1162         * so follow normal path
1163         */
1164        kfree_skb(skb);
1165
1166        return err;
1167}
1168
1169static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1170                                               gfp_t gfp)
1171{
1172        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1173}
1174
1175static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1176                                                gfp_t gfp)
1177{
1178        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1179}
1180
1181int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1182        int offset, int len, int odd, struct sk_buff *skb),
1183        void *from, int length, int transhdrlen,
1184        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1185        struct rt6_info *rt, unsigned int flags, int dontfrag)
1186{
1187        struct inet_sock *inet = inet_sk(sk);
1188        struct ipv6_pinfo *np = inet6_sk(sk);
1189        struct inet_cork *cork;
1190        struct sk_buff *skb;
1191        unsigned int maxfraglen, fragheaderlen;
1192        int exthdrlen;
1193        int dst_exthdrlen;
1194        int hh_len;
1195        int mtu;
1196        int copy;
1197        int err;
1198        int offset = 0;
1199        int csummode = CHECKSUM_NONE;
1200        __u8 tx_flags = 0;
1201
1202        if (flags&MSG_PROBE)
1203                return 0;
1204        cork = &inet->cork.base;
1205        if (skb_queue_empty(&sk->sk_write_queue)) {
1206                /*
1207                 * setup for corking
1208                 */
1209                if (opt) {
1210                        if (WARN_ON(np->cork.opt))
1211                                return -EINVAL;
1212
1213                        np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1214                        if (unlikely(np->cork.opt == NULL))
1215                                return -ENOBUFS;
1216
1217                        np->cork.opt->tot_len = opt->tot_len;
1218                        np->cork.opt->opt_flen = opt->opt_flen;
1219                        np->cork.opt->opt_nflen = opt->opt_nflen;
1220
1221                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1222                                                            sk->sk_allocation);
1223                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1224                                return -ENOBUFS;
1225
1226                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1227                                                            sk->sk_allocation);
1228                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1229                                return -ENOBUFS;
1230
1231                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1232                                                           sk->sk_allocation);
1233                        if (opt->hopopt && !np->cork.opt->hopopt)
1234                                return -ENOBUFS;
1235
1236                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1237                                                            sk->sk_allocation);
1238                        if (opt->srcrt && !np->cork.opt->srcrt)
1239                                return -ENOBUFS;
1240
1241                        /* need source address above miyazawa*/
1242                }
1243                dst_hold(&rt->dst);
1244                cork->dst = &rt->dst;
1245                inet->cork.fl.u.ip6 = *fl6;
1246                np->cork.hop_limit = hlimit;
1247                np->cork.tclass = tclass;
1248                mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1249                      rt->dst.dev->mtu : dst_mtu(&rt->dst);
1250                if (np->frag_size < mtu) {
1251                        if (np->frag_size)
1252                                mtu = np->frag_size;
1253                }
1254                cork->fragsize = mtu;
1255                if (dst_allfrag(rt->dst.path))
1256                        cork->flags |= IPCORK_ALLFRAG;
1257                cork->length = 0;
1258                sk->sk_sndmsg_page = NULL;
1259                sk->sk_sndmsg_off = 0;
1260                exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1261                length += exthdrlen;
1262                transhdrlen += exthdrlen;
1263                dst_exthdrlen = rt->dst.header_len;
1264        } else {
1265                rt = (struct rt6_info *)cork->dst;
1266                fl6 = &inet->cork.fl.u.ip6;
1267                opt = np->cork.opt;
1268                transhdrlen = 0;
1269                exthdrlen = 0;
1270                dst_exthdrlen = 0;
1271                mtu = cork->fragsize;
1272        }
1273
1274        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1275
1276        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1277                        (opt ? opt->opt_nflen : 0);
1278        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1279
1280        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1281                if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1282                        ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1283                        return -EMSGSIZE;
1284                }
1285        }
1286
1287        /* For UDP, check if TX timestamp is enabled */
1288        if (sk->sk_type == SOCK_DGRAM) {
1289                err = sock_tx_timestamp(sk, &tx_flags);
1290                if (err)
1291                        goto error;
1292        }
1293
1294        /*
1295         * Let's try using as much space as possible.
1296         * Use MTU if total length of the message fits into the MTU.
1297         * Otherwise, we need to reserve fragment header and
1298         * fragment alignment (= 8-15 octects, in total).
1299         *
1300         * Note that we may need to "move" the data from the tail of
1301         * of the buffer to the new fragment when we split
1302         * the message.
1303         *
1304         * FIXME: It may be fragmented into multiple chunks
1305         *        at once if non-fragmentable extension headers
1306         *        are too large.
1307         * --yoshfuji
1308         */
1309
1310        cork->length += length;
1311        if (length > mtu) {
1312                int proto = sk->sk_protocol;
1313                if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1314                        ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1315                        return -EMSGSIZE;
1316                }
1317
1318                if (proto == IPPROTO_UDP &&
1319                    (rt->dst.dev->features & NETIF_F_UFO)) {
1320
1321                        err = ip6_ufo_append_data(sk, getfrag, from, length,
1322                                                  hh_len, fragheaderlen,
1323                                                  transhdrlen, mtu, flags, rt);
1324                        if (err)
1325                                goto error;
1326                        return 0;
1327                }
1328        }
1329
1330        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1331                goto alloc_new_skb;
1332
1333        while (length > 0) {
1334                /* Check if the remaining data fits into current packet. */
1335                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1336                if (copy < length)
1337                        copy = maxfraglen - skb->len;
1338
1339                if (copy <= 0) {
1340                        char *data;
1341                        unsigned int datalen;
1342                        unsigned int fraglen;
1343                        unsigned int fraggap;
1344                        unsigned int alloclen;
1345                        struct sk_buff *skb_prev;
1346alloc_new_skb:
1347                        skb_prev = skb;
1348
1349                        /* There's no room in the current skb */
1350                        if (skb_prev)
1351                                fraggap = skb_prev->len - maxfraglen;
1352                        else
1353                                fraggap = 0;
1354
1355                        /*
1356                         * If remaining data exceeds the mtu,
1357                         * we know we need more fragment(s).
1358                         */
1359                        datalen = length + fraggap;
1360                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1361                                datalen = maxfraglen - fragheaderlen;
1362
1363                        fraglen = datalen + fragheaderlen;
1364                        if ((flags & MSG_MORE) &&
1365                            !(rt->dst.dev->features&NETIF_F_SG))
1366                                alloclen = mtu;
1367                        else
1368                                alloclen = datalen + fragheaderlen;
1369
1370                        alloclen += dst_exthdrlen;
1371
1372                        /*
1373                         * The last fragment gets additional space at tail.
1374                         * Note: we overallocate on fragments with MSG_MODE
1375                         * because we have no idea if we're the last one.
1376                         */
1377                        if (datalen == length + fraggap)
1378                                alloclen += rt->dst.trailer_len;
1379
1380                        /*
1381                         * We just reserve space for fragment header.
1382                         * Note: this may be overallocation if the message
1383                         * (without MSG_MORE) fits into the MTU.
1384                         */
1385                        alloclen += sizeof(struct frag_hdr);
1386
1387                        if (transhdrlen) {
1388                                skb = sock_alloc_send_skb(sk,
1389                                                alloclen + hh_len,
1390                                                (flags & MSG_DONTWAIT), &err);
1391                        } else {
1392                                skb = NULL;
1393                                if (atomic_read(&sk->sk_wmem_alloc) <=
1394                                    2 * sk->sk_sndbuf)
1395                                        skb = sock_wmalloc(sk,
1396                                                           alloclen + hh_len, 1,
1397                                                           sk->sk_allocation);
1398                                if (unlikely(skb == NULL))
1399                                        err = -ENOBUFS;
1400                                else {
1401                                        /* Only the initial fragment
1402                                         * is time stamped.
1403                                         */
1404                                        tx_flags = 0;
1405                                }
1406                        }
1407                        if (skb == NULL)
1408                                goto error;
1409                        /*
1410                         *      Fill in the control structures
1411                         */
1412                        skb->ip_summed = csummode;
1413                        skb->csum = 0;
1414                        /* reserve for fragmentation */
1415                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1416
1417                        if (sk->sk_type == SOCK_DGRAM)
1418                                skb_shinfo(skb)->tx_flags = tx_flags;
1419
1420                        /*
1421                         *      Find where to start putting bytes
1422                         */
1423                        data = skb_put(skb, fraglen + dst_exthdrlen);
1424                        skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
1425                        data += fragheaderlen + dst_exthdrlen;
1426                        skb->transport_header = (skb->network_header +
1427                                                 fragheaderlen);
1428                        if (fraggap) {
1429                                skb->csum = skb_copy_and_csum_bits(
1430                                        skb_prev, maxfraglen,
1431                                        data + transhdrlen, fraggap, 0);
1432                                skb_prev->csum = csum_sub(skb_prev->csum,
1433                                                          skb->csum);
1434                                data += fraggap;
1435                                pskb_trim_unique(skb_prev, maxfraglen);
1436                        }
1437                        copy = datalen - transhdrlen - fraggap;
1438
1439                        if (copy < 0) {
1440                                err = -EINVAL;
1441                                kfree_skb(skb);
1442                                goto error;
1443                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1444                                err = -EFAULT;
1445                                kfree_skb(skb);
1446                                goto error;
1447                        }
1448
1449                        offset += copy;
1450                        length -= datalen - fraggap;
1451                        transhdrlen = 0;
1452                        exthdrlen = 0;
1453                        dst_exthdrlen = 0;
1454                        csummode = CHECKSUM_NONE;
1455
1456                        /*
1457                         * Put the packet on the pending queue
1458                         */
1459                        __skb_queue_tail(&sk->sk_write_queue, skb);
1460                        continue;
1461                }
1462
1463                if (copy > length)
1464                        copy = length;
1465
1466                if (!(rt->dst.dev->features&NETIF_F_SG)) {
1467                        unsigned int off;
1468
1469                        off = skb->len;
1470                        if (getfrag(from, skb_put(skb, copy),
1471                                                offset, copy, off, skb) < 0) {
1472                                __skb_trim(skb, off);
1473                                err = -EFAULT;
1474                                goto error;
1475                        }
1476                } else {
1477                        int i = skb_shinfo(skb)->nr_frags;
1478                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1479                        struct page *page = sk->sk_sndmsg_page;
1480                        int off = sk->sk_sndmsg_off;
1481                        unsigned int left;
1482
1483                        if (page && (left = PAGE_SIZE - off) > 0) {
1484                                if (copy >= left)
1485                                        copy = left;
1486                                if (page != skb_frag_page(frag)) {
1487                                        if (i == MAX_SKB_FRAGS) {
1488                                                err = -EMSGSIZE;
1489                                                goto error;
1490                                        }
1491                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1492                                        skb_frag_ref(skb, i);
1493                                        frag = &skb_shinfo(skb)->frags[i];
1494                                }
1495                        } else if(i < MAX_SKB_FRAGS) {
1496                                if (copy > PAGE_SIZE)
1497                                        copy = PAGE_SIZE;
1498                                page = alloc_pages(sk->sk_allocation, 0);
1499                                if (page == NULL) {
1500                                        err = -ENOMEM;
1501                                        goto error;
1502                                }
1503                                sk->sk_sndmsg_page = page;
1504                                sk->sk_sndmsg_off = 0;
1505
1506                                skb_fill_page_desc(skb, i, page, 0, 0);
1507                                frag = &skb_shinfo(skb)->frags[i];
1508                        } else {
1509                                err = -EMSGSIZE;
1510                                goto error;
1511                        }
1512                        if (getfrag(from,
1513                                    skb_frag_address(frag) + skb_frag_size(frag),
1514                                    offset, copy, skb->len, skb) < 0) {
1515                                err = -EFAULT;
1516                                goto error;
1517                        }
1518                        sk->sk_sndmsg_off += copy;
1519                        skb_frag_size_add(frag, copy);
1520                        skb->len += copy;
1521                        skb->data_len += copy;
1522                        skb->truesize += copy;
1523                        atomic_add(copy, &sk->sk_wmem_alloc);
1524                }
1525                offset += copy;
1526                length -= copy;
1527        }
1528        return 0;
1529error:
1530        cork->length -= length;
1531        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1532        return err;
1533}
1534
1535static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1536{
1537        if (np->cork.opt) {
1538                kfree(np->cork.opt->dst0opt);
1539                kfree(np->cork.opt->dst1opt);
1540                kfree(np->cork.opt->hopopt);
1541                kfree(np->cork.opt->srcrt);
1542                kfree(np->cork.opt);
1543                np->cork.opt = NULL;
1544        }
1545
1546        if (inet->cork.base.dst) {
1547                dst_release(inet->cork.base.dst);
1548                inet->cork.base.dst = NULL;
1549                inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1550        }
1551        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1552}
1553
1554int ip6_push_pending_frames(struct sock *sk)
1555{
1556        struct sk_buff *skb, *tmp_skb;
1557        struct sk_buff **tail_skb;
1558        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1559        struct inet_sock *inet = inet_sk(sk);
1560        struct ipv6_pinfo *np = inet6_sk(sk);
1561        struct net *net = sock_net(sk);
1562        struct ipv6hdr *hdr;
1563        struct ipv6_txoptions *opt = np->cork.opt;
1564        struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1565        struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1566        unsigned char proto = fl6->flowi6_proto;
1567        int err = 0;
1568
1569        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1570                goto out;
1571        tail_skb = &(skb_shinfo(skb)->frag_list);
1572
1573        /* move skb->data to ip header from ext header */
1574        if (skb->data < skb_network_header(skb))
1575                __skb_pull(skb, skb_network_offset(skb));
1576        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1577                __skb_pull(tmp_skb, skb_network_header_len(skb));
1578                *tail_skb = tmp_skb;
1579                tail_skb = &(tmp_skb->next);
1580                skb->len += tmp_skb->len;
1581                skb->data_len += tmp_skb->len;
1582                skb->truesize += tmp_skb->truesize;
1583                tmp_skb->destructor = NULL;
1584                tmp_skb->sk = NULL;
1585        }
1586
1587        /* Allow local fragmentation. */
1588        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1589                skb->local_df = 1;
1590
1591        ipv6_addr_copy(final_dst, &fl6->daddr);
1592        __skb_pull(skb, skb_network_header_len(skb));
1593        if (opt && opt->opt_flen)
1594                ipv6_push_frag_opts(skb, opt, &proto);
1595        if (opt && opt->opt_nflen)
1596                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1597
1598        skb_push(skb, sizeof(struct ipv6hdr));
1599        skb_reset_network_header(skb);
1600        hdr = ipv6_hdr(skb);
1601
1602        *(__be32*)hdr = fl6->flowlabel |
1603                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1604
1605        hdr->hop_limit = np->cork.hop_limit;
1606        hdr->nexthdr = proto;
1607        ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1608        ipv6_addr_copy(&hdr->daddr, final_dst);
1609
1610        skb->priority = sk->sk_priority;
1611        skb->mark = sk->sk_mark;
1612
1613        skb_dst_set(skb, dst_clone(&rt->dst));
1614        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1615        if (proto == IPPROTO_ICMPV6) {
1616                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1617
1618                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1619                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1620        }
1621
1622        err = ip6_local_out(skb);
1623        if (err) {
1624                if (err > 0)
1625                        err = net_xmit_errno(err);
1626                if (err)
1627                        goto error;
1628        }
1629
1630out:
1631        ip6_cork_release(inet, np);
1632        return err;
1633error:
1634        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1635        goto out;
1636}
1637
1638void ip6_flush_pending_frames(struct sock *sk)
1639{
1640        struct sk_buff *skb;
1641
1642        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1643                if (skb_dst(skb))
1644                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1645                                      IPSTATS_MIB_OUTDISCARDS);
1646                kfree_skb(skb);
1647        }
1648
1649        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1650}
1651