linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58
  59int __ip6_local_out(struct sk_buff *skb)
  60{
  61        int len;
  62
  63        len = skb->len - sizeof(struct ipv6hdr);
  64        if (len > IPV6_MAXPLEN)
  65                len = 0;
  66        ipv6_hdr(skb)->payload_len = htons(len);
  67
  68        return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
  69                       skb_dst(skb)->dev, dst_output);
  70}
  71
  72int ip6_local_out(struct sk_buff *skb)
  73{
  74        int err;
  75
  76        err = __ip6_local_out(skb);
  77        if (likely(err == 1))
  78                err = dst_output(skb);
  79
  80        return err;
  81}
  82EXPORT_SYMBOL_GPL(ip6_local_out);
  83
  84static int ip6_finish_output2(struct sk_buff *skb)
  85{
  86        struct dst_entry *dst = skb_dst(skb);
  87        struct net_device *dev = dst->dev;
  88        struct neighbour *neigh;
  89        struct in6_addr *nexthop;
  90        int ret;
  91
  92        skb->protocol = htons(ETH_P_IPV6);
  93        skb->dev = dev;
  94
  95        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  96                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  97
  98                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
  99                    ((mroute6_socket(dev_net(dev), skb) &&
 100                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 101                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 102                                         &ipv6_hdr(skb)->saddr))) {
 103                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 104
 105                        /* Do not check for IFF_ALLMULTI; multicast routing
 106                           is not supported in any case.
 107                         */
 108                        if (newskb)
 109                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 110                                        newskb, NULL, newskb->dev,
 111                                        dev_loopback_xmit);
 112
 113                        if (ipv6_hdr(skb)->hop_limit == 0) {
 114                                IP6_INC_STATS(dev_net(dev), idev,
 115                                              IPSTATS_MIB_OUTDISCARDS);
 116                                kfree_skb(skb);
 117                                return 0;
 118                        }
 119                }
 120
 121                IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 122                                skb->len);
 123
 124                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 125                    IPV6_ADDR_SCOPE_NODELOCAL &&
 126                    !(dev->flags & IFF_LOOPBACK)) {
 127                        kfree_skb(skb);
 128                        return 0;
 129                }
 130        }
 131
 132        rcu_read_lock_bh();
 133        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 134        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 135        if (unlikely(!neigh))
 136                neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 137        if (!IS_ERR(neigh)) {
 138                ret = dst_neigh_output(dst, neigh, skb);
 139                rcu_read_unlock_bh();
 140                return ret;
 141        }
 142        rcu_read_unlock_bh();
 143
 144        IP6_INC_STATS_BH(dev_net(dst->dev),
 145                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 146        kfree_skb(skb);
 147        return -EINVAL;
 148}
 149
 150static int ip6_finish_output(struct sk_buff *skb)
 151{
 152        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 153            dst_allfrag(skb_dst(skb)))
 154                return ip6_fragment(skb, ip6_finish_output2);
 155        else
 156                return ip6_finish_output2(skb);
 157}
 158
 159int ip6_output(struct sk_buff *skb)
 160{
 161        struct net_device *dev = skb_dst(skb)->dev;
 162        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 163        if (unlikely(idev->cnf.disable_ipv6)) {
 164                IP6_INC_STATS(dev_net(dev), idev,
 165                              IPSTATS_MIB_OUTDISCARDS);
 166                kfree_skb(skb);
 167                return 0;
 168        }
 169
 170        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 171                            ip6_finish_output,
 172                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 173}
 174
 175/*
 176 *      xmit an sk_buff (used by TCP, SCTP and DCCP)
 177 */
 178
 179int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 180             struct ipv6_txoptions *opt, int tclass)
 181{
 182        struct net *net = sock_net(sk);
 183        struct ipv6_pinfo *np = inet6_sk(sk);
 184        struct in6_addr *first_hop = &fl6->daddr;
 185        struct dst_entry *dst = skb_dst(skb);
 186        struct ipv6hdr *hdr;
 187        u8  proto = fl6->flowi6_proto;
 188        int seg_len = skb->len;
 189        int hlimit = -1;
 190        u32 mtu;
 191
 192        if (opt) {
 193                unsigned int head_room;
 194
 195                /* First: exthdrs may take lots of space (~8K for now)
 196                   MAX_HEADER is not enough.
 197                 */
 198                head_room = opt->opt_nflen + opt->opt_flen;
 199                seg_len += head_room;
 200                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 201
 202                if (skb_headroom(skb) < head_room) {
 203                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 204                        if (skb2 == NULL) {
 205                                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 206                                              IPSTATS_MIB_OUTDISCARDS);
 207                                kfree_skb(skb);
 208                                return -ENOBUFS;
 209                        }
 210                        consume_skb(skb);
 211                        skb = skb2;
 212                        skb_set_owner_w(skb, sk);
 213                }
 214                if (opt->opt_flen)
 215                        ipv6_push_frag_opts(skb, opt, &proto);
 216                if (opt->opt_nflen)
 217                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 218        }
 219
 220        skb_push(skb, sizeof(struct ipv6hdr));
 221        skb_reset_network_header(skb);
 222        hdr = ipv6_hdr(skb);
 223
 224        /*
 225         *      Fill in the IPv6 header
 226         */
 227        if (np)
 228                hlimit = np->hop_limit;
 229        if (hlimit < 0)
 230                hlimit = ip6_dst_hoplimit(dst);
 231
 232        ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
 233
 234        hdr->payload_len = htons(seg_len);
 235        hdr->nexthdr = proto;
 236        hdr->hop_limit = hlimit;
 237
 238        hdr->saddr = fl6->saddr;
 239        hdr->daddr = *first_hop;
 240
 241        skb->priority = sk->sk_priority;
 242        skb->mark = sk->sk_mark;
 243
 244        mtu = dst_mtu(dst);
 245        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 246                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 247                              IPSTATS_MIB_OUT, skb->len);
 248                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 249                               dst->dev, dst_output);
 250        }
 251
 252        skb->dev = dst->dev;
 253        ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
 254        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 255        kfree_skb(skb);
 256        return -EMSGSIZE;
 257}
 258
 259EXPORT_SYMBOL(ip6_xmit);
 260
 261static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 262{
 263        struct ip6_ra_chain *ra;
 264        struct sock *last = NULL;
 265
 266        read_lock(&ip6_ra_lock);
 267        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 268                struct sock *sk = ra->sk;
 269                if (sk && ra->sel == sel &&
 270                    (!sk->sk_bound_dev_if ||
 271                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 272                        if (last) {
 273                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 274                                if (skb2)
 275                                        rawv6_rcv(last, skb2);
 276                        }
 277                        last = sk;
 278                }
 279        }
 280
 281        if (last) {
 282                rawv6_rcv(last, skb);
 283                read_unlock(&ip6_ra_lock);
 284                return 1;
 285        }
 286        read_unlock(&ip6_ra_lock);
 287        return 0;
 288}
 289
 290static int ip6_forward_proxy_check(struct sk_buff *skb)
 291{
 292        struct ipv6hdr *hdr = ipv6_hdr(skb);
 293        u8 nexthdr = hdr->nexthdr;
 294        __be16 frag_off;
 295        int offset;
 296
 297        if (ipv6_ext_hdr(nexthdr)) {
 298                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 299                if (offset < 0)
 300                        return 0;
 301        } else
 302                offset = sizeof(struct ipv6hdr);
 303
 304        if (nexthdr == IPPROTO_ICMPV6) {
 305                struct icmp6hdr *icmp6;
 306
 307                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 308                                         offset + 1 - skb->data)))
 309                        return 0;
 310
 311                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 312
 313                switch (icmp6->icmp6_type) {
 314                case NDISC_ROUTER_SOLICITATION:
 315                case NDISC_ROUTER_ADVERTISEMENT:
 316                case NDISC_NEIGHBOUR_SOLICITATION:
 317                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 318                case NDISC_REDIRECT:
 319                        /* For reaction involving unicast neighbor discovery
 320                         * message destined to the proxied address, pass it to
 321                         * input function.
 322                         */
 323                        return 1;
 324                default:
 325                        break;
 326                }
 327        }
 328
 329        /*
 330         * The proxying router can't forward traffic sent to a link-local
 331         * address, so signal the sender and discard the packet. This
 332         * behavior is clarified by the MIPv6 specification.
 333         */
 334        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 335                dst_link_failure(skb);
 336                return -1;
 337        }
 338
 339        return 0;
 340}
 341
 342static inline int ip6_forward_finish(struct sk_buff *skb)
 343{
 344        return dst_output(skb);
 345}
 346
 347int ip6_forward(struct sk_buff *skb)
 348{
 349        struct dst_entry *dst = skb_dst(skb);
 350        struct ipv6hdr *hdr = ipv6_hdr(skb);
 351        struct inet6_skb_parm *opt = IP6CB(skb);
 352        struct net *net = dev_net(dst->dev);
 353        u32 mtu;
 354
 355        if (net->ipv6.devconf_all->forwarding == 0)
 356                goto error;
 357
 358        if (skb_warn_if_lro(skb))
 359                goto drop;
 360
 361        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 362                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 363                goto drop;
 364        }
 365
 366        if (skb->pkt_type != PACKET_HOST)
 367                goto drop;
 368
 369        skb_forward_csum(skb);
 370
 371        /*
 372         *      We DO NOT make any processing on
 373         *      RA packets, pushing them to user level AS IS
 374         *      without ane WARRANTY that application will be able
 375         *      to interpret them. The reason is that we
 376         *      cannot make anything clever here.
 377         *
 378         *      We are not end-node, so that if packet contains
 379         *      AH/ESP, we cannot make anything.
 380         *      Defragmentation also would be mistake, RA packets
 381         *      cannot be fragmented, because there is no warranty
 382         *      that different fragments will go along one path. --ANK
 383         */
 384        if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 385                if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 386                        return 0;
 387        }
 388
 389        /*
 390         *      check and decrement ttl
 391         */
 392        if (hdr->hop_limit <= 1) {
 393                /* Force OUTPUT device used as source address */
 394                skb->dev = dst->dev;
 395                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 396                IP6_INC_STATS_BH(net,
 397                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 398
 399                kfree_skb(skb);
 400                return -ETIMEDOUT;
 401        }
 402
 403        /* XXX: idev->cnf.proxy_ndp? */
 404        if (net->ipv6.devconf_all->proxy_ndp &&
 405            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 406                int proxied = ip6_forward_proxy_check(skb);
 407                if (proxied > 0)
 408                        return ip6_input(skb);
 409                else if (proxied < 0) {
 410                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 411                                      IPSTATS_MIB_INDISCARDS);
 412                        goto drop;
 413                }
 414        }
 415
 416        if (!xfrm6_route_forward(skb)) {
 417                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 418                goto drop;
 419        }
 420        dst = skb_dst(skb);
 421
 422        /* IPv6 specs say nothing about it, but it is clear that we cannot
 423           send redirects to source routed frames.
 424           We don't send redirects to frames decapsulated from IPsec.
 425         */
 426        if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 427                struct in6_addr *target = NULL;
 428                struct inet_peer *peer;
 429                struct rt6_info *rt;
 430
 431                /*
 432                 *      incoming and outgoing devices are the same
 433                 *      send a redirect.
 434                 */
 435
 436                rt = (struct rt6_info *) dst;
 437                if (rt->rt6i_flags & RTF_GATEWAY)
 438                        target = &rt->rt6i_gateway;
 439                else
 440                        target = &hdr->daddr;
 441
 442                peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 443
 444                /* Limit redirects both by destination (here)
 445                   and by source (inside ndisc_send_redirect)
 446                 */
 447                if (inet_peer_xrlim_allow(peer, 1*HZ))
 448                        ndisc_send_redirect(skb, target);
 449                if (peer)
 450                        inet_putpeer(peer);
 451        } else {
 452                int addrtype = ipv6_addr_type(&hdr->saddr);
 453
 454                /* This check is security critical. */
 455                if (addrtype == IPV6_ADDR_ANY ||
 456                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 457                        goto error;
 458                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 459                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 460                                    ICMPV6_NOT_NEIGHBOUR, 0);
 461                        goto error;
 462                }
 463        }
 464
 465        mtu = dst_mtu(dst);
 466        if (mtu < IPV6_MIN_MTU)
 467                mtu = IPV6_MIN_MTU;
 468
 469        if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
 470            (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
 471                /* Again, force OUTPUT device used as source address */
 472                skb->dev = dst->dev;
 473                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 474                IP6_INC_STATS_BH(net,
 475                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 476                IP6_INC_STATS_BH(net,
 477                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 478                kfree_skb(skb);
 479                return -EMSGSIZE;
 480        }
 481
 482        if (skb_cow(skb, dst->dev->hard_header_len)) {
 483                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 484                goto drop;
 485        }
 486
 487        hdr = ipv6_hdr(skb);
 488
 489        /* Mangling hops number delayed to point after skb COW */
 490
 491        hdr->hop_limit--;
 492
 493        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 494        IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 495        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 496                       ip6_forward_finish);
 497
 498error:
 499        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 500drop:
 501        kfree_skb(skb);
 502        return -EINVAL;
 503}
 504
 505static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 506{
 507        to->pkt_type = from->pkt_type;
 508        to->priority = from->priority;
 509        to->protocol = from->protocol;
 510        skb_dst_drop(to);
 511        skb_dst_set(to, dst_clone(skb_dst(from)));
 512        to->dev = from->dev;
 513        to->mark = from->mark;
 514
 515#ifdef CONFIG_NET_SCHED
 516        to->tc_index = from->tc_index;
 517#endif
 518        nf_copy(to, from);
 519#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 520        to->nf_trace = from->nf_trace;
 521#endif
 522        skb_copy_secmark(to, from);
 523}
 524
 525int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 526{
 527        struct sk_buff *frag;
 528        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 529        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 530        struct ipv6hdr *tmp_hdr;
 531        struct frag_hdr *fh;
 532        unsigned int mtu, hlen, left, len;
 533        int hroom, troom;
 534        __be32 frag_id = 0;
 535        int ptr, offset = 0, err=0;
 536        u8 *prevhdr, nexthdr = 0;
 537        struct net *net = dev_net(skb_dst(skb)->dev);
 538
 539        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 540        nexthdr = *prevhdr;
 541
 542        mtu = ip6_skb_dst_mtu(skb);
 543
 544        /* We must not fragment if the socket is set to force MTU discovery
 545         * or if the skb it not generated by a local socket.
 546         */
 547        if (unlikely(!skb->local_df && skb->len > mtu) ||
 548                     (IP6CB(skb)->frag_max_size &&
 549                      IP6CB(skb)->frag_max_size > mtu)) {
 550                if (skb->sk && dst_allfrag(skb_dst(skb)))
 551                        sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 552
 553                skb->dev = skb_dst(skb)->dev;
 554                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 555                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 556                              IPSTATS_MIB_FRAGFAILS);
 557                kfree_skb(skb);
 558                return -EMSGSIZE;
 559        }
 560
 561        if (np && np->frag_size < mtu) {
 562                if (np->frag_size)
 563                        mtu = np->frag_size;
 564        }
 565        mtu -= hlen + sizeof(struct frag_hdr);
 566
 567        if (skb_has_frag_list(skb)) {
 568                int first_len = skb_pagelen(skb);
 569                struct sk_buff *frag2;
 570
 571                if (first_len - hlen > mtu ||
 572                    ((first_len - hlen) & 7) ||
 573                    skb_cloned(skb))
 574                        goto slow_path;
 575
 576                skb_walk_frags(skb, frag) {
 577                        /* Correct geometry. */
 578                        if (frag->len > mtu ||
 579                            ((frag->len & 7) && frag->next) ||
 580                            skb_headroom(frag) < hlen)
 581                                goto slow_path_clean;
 582
 583                        /* Partially cloned skb? */
 584                        if (skb_shared(frag))
 585                                goto slow_path_clean;
 586
 587                        BUG_ON(frag->sk);
 588                        if (skb->sk) {
 589                                frag->sk = skb->sk;
 590                                frag->destructor = sock_wfree;
 591                        }
 592                        skb->truesize -= frag->truesize;
 593                }
 594
 595                err = 0;
 596                offset = 0;
 597                frag = skb_shinfo(skb)->frag_list;
 598                skb_frag_list_init(skb);
 599                /* BUILD HEADER */
 600
 601                *prevhdr = NEXTHDR_FRAGMENT;
 602                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 603                if (!tmp_hdr) {
 604                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 605                                      IPSTATS_MIB_FRAGFAILS);
 606                        return -ENOMEM;
 607                }
 608
 609                __skb_pull(skb, hlen);
 610                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 611                __skb_push(skb, hlen);
 612                skb_reset_network_header(skb);
 613                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 614
 615                ipv6_select_ident(fh, rt);
 616                fh->nexthdr = nexthdr;
 617                fh->reserved = 0;
 618                fh->frag_off = htons(IP6_MF);
 619                frag_id = fh->identification;
 620
 621                first_len = skb_pagelen(skb);
 622                skb->data_len = first_len - skb_headlen(skb);
 623                skb->len = first_len;
 624                ipv6_hdr(skb)->payload_len = htons(first_len -
 625                                                   sizeof(struct ipv6hdr));
 626
 627                dst_hold(&rt->dst);
 628
 629                for (;;) {
 630                        /* Prepare header of the next frame,
 631                         * before previous one went down. */
 632                        if (frag) {
 633                                frag->ip_summed = CHECKSUM_NONE;
 634                                skb_reset_transport_header(frag);
 635                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 636                                __skb_push(frag, hlen);
 637                                skb_reset_network_header(frag);
 638                                memcpy(skb_network_header(frag), tmp_hdr,
 639                                       hlen);
 640                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 641                                fh->nexthdr = nexthdr;
 642                                fh->reserved = 0;
 643                                fh->frag_off = htons(offset);
 644                                if (frag->next != NULL)
 645                                        fh->frag_off |= htons(IP6_MF);
 646                                fh->identification = frag_id;
 647                                ipv6_hdr(frag)->payload_len =
 648                                                htons(frag->len -
 649                                                      sizeof(struct ipv6hdr));
 650                                ip6_copy_metadata(frag, skb);
 651                        }
 652
 653                        err = output(skb);
 654                        if(!err)
 655                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 656                                              IPSTATS_MIB_FRAGCREATES);
 657
 658                        if (err || !frag)
 659                                break;
 660
 661                        skb = frag;
 662                        frag = skb->next;
 663                        skb->next = NULL;
 664                }
 665
 666                kfree(tmp_hdr);
 667
 668                if (err == 0) {
 669                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 670                                      IPSTATS_MIB_FRAGOKS);
 671                        ip6_rt_put(rt);
 672                        return 0;
 673                }
 674
 675                while (frag) {
 676                        skb = frag->next;
 677                        kfree_skb(frag);
 678                        frag = skb;
 679                }
 680
 681                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 682                              IPSTATS_MIB_FRAGFAILS);
 683                ip6_rt_put(rt);
 684                return err;
 685
 686slow_path_clean:
 687                skb_walk_frags(skb, frag2) {
 688                        if (frag2 == frag)
 689                                break;
 690                        frag2->sk = NULL;
 691                        frag2->destructor = NULL;
 692                        skb->truesize += frag2->truesize;
 693                }
 694        }
 695
 696slow_path:
 697        if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
 698            skb_checksum_help(skb))
 699                goto fail;
 700
 701        left = skb->len - hlen;         /* Space per frame */
 702        ptr = hlen;                     /* Where to start from */
 703
 704        /*
 705         *      Fragment the datagram.
 706         */
 707
 708        *prevhdr = NEXTHDR_FRAGMENT;
 709        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 710        troom = rt->dst.dev->needed_tailroom;
 711
 712        /*
 713         *      Keep copying data until we run out.
 714         */
 715        while(left > 0) {
 716                len = left;
 717                /* IF: it doesn't fit, use 'mtu' - the data space left */
 718                if (len > mtu)
 719                        len = mtu;
 720                /* IF: we are not sending up to and including the packet end
 721                   then align the next start on an eight byte boundary */
 722                if (len < left) {
 723                        len &= ~7;
 724                }
 725                /*
 726                 *      Allocate buffer.
 727                 */
 728
 729                if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 730                                      hroom + troom, GFP_ATOMIC)) == NULL) {
 731                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 732                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 733                                      IPSTATS_MIB_FRAGFAILS);
 734                        err = -ENOMEM;
 735                        goto fail;
 736                }
 737
 738                /*
 739                 *      Set up data on packet
 740                 */
 741
 742                ip6_copy_metadata(frag, skb);
 743                skb_reserve(frag, hroom);
 744                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 745                skb_reset_network_header(frag);
 746                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 747                frag->transport_header = (frag->network_header + hlen +
 748                                          sizeof(struct frag_hdr));
 749
 750                /*
 751                 *      Charge the memory for the fragment to any owner
 752                 *      it might possess
 753                 */
 754                if (skb->sk)
 755                        skb_set_owner_w(frag, skb->sk);
 756
 757                /*
 758                 *      Copy the packet header into the new buffer.
 759                 */
 760                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 761
 762                /*
 763                 *      Build fragment header.
 764                 */
 765                fh->nexthdr = nexthdr;
 766                fh->reserved = 0;
 767                if (!frag_id) {
 768                        ipv6_select_ident(fh, rt);
 769                        frag_id = fh->identification;
 770                } else
 771                        fh->identification = frag_id;
 772
 773                /*
 774                 *      Copy a block of the IP datagram.
 775                 */
 776                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 777                        BUG();
 778                left -= len;
 779
 780                fh->frag_off = htons(offset);
 781                if (left > 0)
 782                        fh->frag_off |= htons(IP6_MF);
 783                ipv6_hdr(frag)->payload_len = htons(frag->len -
 784                                                    sizeof(struct ipv6hdr));
 785
 786                ptr += len;
 787                offset += len;
 788
 789                /*
 790                 *      Put this fragment into the sending queue.
 791                 */
 792                err = output(frag);
 793                if (err)
 794                        goto fail;
 795
 796                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 797                              IPSTATS_MIB_FRAGCREATES);
 798        }
 799        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 800                      IPSTATS_MIB_FRAGOKS);
 801        consume_skb(skb);
 802        return err;
 803
 804fail:
 805        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 806                      IPSTATS_MIB_FRAGFAILS);
 807        kfree_skb(skb);
 808        return err;
 809}
 810
 811static inline int ip6_rt_check(const struct rt6key *rt_key,
 812                               const struct in6_addr *fl_addr,
 813                               const struct in6_addr *addr_cache)
 814{
 815        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 816                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 817}
 818
 819static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 820                                          struct dst_entry *dst,
 821                                          const struct flowi6 *fl6)
 822{
 823        struct ipv6_pinfo *np = inet6_sk(sk);
 824        struct rt6_info *rt;
 825
 826        if (!dst)
 827                goto out;
 828
 829        if (dst->ops->family != AF_INET6) {
 830                dst_release(dst);
 831                return NULL;
 832        }
 833
 834        rt = (struct rt6_info *)dst;
 835        /* Yes, checking route validity in not connected
 836         * case is not very simple. Take into account,
 837         * that we do not support routing by source, TOS,
 838         * and MSG_DONTROUTE            --ANK (980726)
 839         *
 840         * 1. ip6_rt_check(): If route was host route,
 841         *    check that cached destination is current.
 842         *    If it is network route, we still may
 843         *    check its validity using saved pointer
 844         *    to the last used address: daddr_cache.
 845         *    We do not want to save whole address now,
 846         *    (because main consumer of this service
 847         *    is tcp, which has not this problem),
 848         *    so that the last trick works only on connected
 849         *    sockets.
 850         * 2. oif also should be the same.
 851         */
 852        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 853#ifdef CONFIG_IPV6_SUBTREES
 854            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 855#endif
 856            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 857                dst_release(dst);
 858                dst = NULL;
 859        }
 860
 861out:
 862        return dst;
 863}
 864
 865static int ip6_dst_lookup_tail(struct sock *sk,
 866                               struct dst_entry **dst, struct flowi6 *fl6)
 867{
 868        struct net *net = sock_net(sk);
 869#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 870        struct neighbour *n;
 871        struct rt6_info *rt;
 872#endif
 873        int err;
 874
 875        if (*dst == NULL)
 876                *dst = ip6_route_output(net, sk, fl6);
 877
 878        if ((err = (*dst)->error))
 879                goto out_err_release;
 880
 881        if (ipv6_addr_any(&fl6->saddr)) {
 882                struct rt6_info *rt = (struct rt6_info *) *dst;
 883                err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 884                                          sk ? inet6_sk(sk)->srcprefs : 0,
 885                                          &fl6->saddr);
 886                if (err)
 887                        goto out_err_release;
 888        }
 889
 890#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 891        /*
 892         * Here if the dst entry we've looked up
 893         * has a neighbour entry that is in the INCOMPLETE
 894         * state and the src address from the flow is
 895         * marked as OPTIMISTIC, we release the found
 896         * dst entry and replace it instead with the
 897         * dst entry of the nexthop router
 898         */
 899        rt = (struct rt6_info *) *dst;
 900        rcu_read_lock_bh();
 901        n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
 902        err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
 903        rcu_read_unlock_bh();
 904
 905        if (err) {
 906                struct inet6_ifaddr *ifp;
 907                struct flowi6 fl_gw6;
 908                int redirect;
 909
 910                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 911                                      (*dst)->dev, 1);
 912
 913                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 914                if (ifp)
 915                        in6_ifa_put(ifp);
 916
 917                if (redirect) {
 918                        /*
 919                         * We need to get the dst entry for the
 920                         * default router instead
 921                         */
 922                        dst_release(*dst);
 923                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
 924                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
 925                        *dst = ip6_route_output(net, sk, &fl_gw6);
 926                        if ((err = (*dst)->error))
 927                                goto out_err_release;
 928                }
 929        }
 930#endif
 931
 932        return 0;
 933
 934out_err_release:
 935        if (err == -ENETUNREACH)
 936                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 937        dst_release(*dst);
 938        *dst = NULL;
 939        return err;
 940}
 941
 942/**
 943 *      ip6_dst_lookup - perform route lookup on flow
 944 *      @sk: socket which provides route info
 945 *      @dst: pointer to dst_entry * for result
 946 *      @fl6: flow to lookup
 947 *
 948 *      This function performs a route lookup on the given flow.
 949 *
 950 *      It returns zero on success, or a standard errno code on error.
 951 */
 952int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
 953{
 954        *dst = NULL;
 955        return ip6_dst_lookup_tail(sk, dst, fl6);
 956}
 957EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 958
 959/**
 960 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
 961 *      @sk: socket which provides route info
 962 *      @fl6: flow to lookup
 963 *      @final_dst: final destination address for ipsec lookup
 964 *      @can_sleep: we are in a sleepable context
 965 *
 966 *      This function performs a route lookup on the given flow.
 967 *
 968 *      It returns a valid dst pointer on success, or a pointer encoded
 969 *      error code.
 970 */
 971struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 972                                      const struct in6_addr *final_dst,
 973                                      bool can_sleep)
 974{
 975        struct dst_entry *dst = NULL;
 976        int err;
 977
 978        err = ip6_dst_lookup_tail(sk, &dst, fl6);
 979        if (err)
 980                return ERR_PTR(err);
 981        if (final_dst)
 982                fl6->daddr = *final_dst;
 983        if (can_sleep)
 984                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 985
 986        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 987}
 988EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
 989
 990/**
 991 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
 992 *      @sk: socket which provides the dst cache and route info
 993 *      @fl6: flow to lookup
 994 *      @final_dst: final destination address for ipsec lookup
 995 *      @can_sleep: we are in a sleepable context
 996 *
 997 *      This function performs a route lookup on the given flow with the
 998 *      possibility of using the cached route in the socket if it is valid.
 999 *      It will take the socket dst lock when operating on the dst cache.
1000 *      As a result, this function can only be used in process context.
1001 *
1002 *      It returns a valid dst pointer on success, or a pointer encoded
1003 *      error code.
1004 */
1005struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1006                                         const struct in6_addr *final_dst,
1007                                         bool can_sleep)
1008{
1009        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1010        int err;
1011
1012        dst = ip6_sk_dst_check(sk, dst, fl6);
1013
1014        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1015        if (err)
1016                return ERR_PTR(err);
1017        if (final_dst)
1018                fl6->daddr = *final_dst;
1019        if (can_sleep)
1020                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1021
1022        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1023}
1024EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1025
1026static inline int ip6_ufo_append_data(struct sock *sk,
1027                        int getfrag(void *from, char *to, int offset, int len,
1028                        int odd, struct sk_buff *skb),
1029                        void *from, int length, int hh_len, int fragheaderlen,
1030                        int transhdrlen, int mtu,unsigned int flags,
1031                        struct rt6_info *rt)
1032
1033{
1034        struct sk_buff *skb;
1035        int err;
1036
1037        /* There is support for UDP large send offload by network
1038         * device, so create one single skb packet containing complete
1039         * udp datagram
1040         */
1041        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1042                skb = sock_alloc_send_skb(sk,
1043                        hh_len + fragheaderlen + transhdrlen + 20,
1044                        (flags & MSG_DONTWAIT), &err);
1045                if (skb == NULL)
1046                        return err;
1047
1048                /* reserve space for Hardware header */
1049                skb_reserve(skb, hh_len);
1050
1051                /* create space for UDP/IP header */
1052                skb_put(skb,fragheaderlen + transhdrlen);
1053
1054                /* initialize network header pointer */
1055                skb_reset_network_header(skb);
1056
1057                /* initialize protocol header pointer */
1058                skb->transport_header = skb->network_header + fragheaderlen;
1059
1060                skb->ip_summed = CHECKSUM_PARTIAL;
1061                skb->csum = 0;
1062        }
1063
1064        err = skb_append_datato_frags(sk,skb, getfrag, from,
1065                                      (length - transhdrlen));
1066        if (!err) {
1067                struct frag_hdr fhdr;
1068
1069                /* Specify the length of each IPv6 datagram fragment.
1070                 * It has to be a multiple of 8.
1071                 */
1072                skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1073                                             sizeof(struct frag_hdr)) & ~7;
1074                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1075                ipv6_select_ident(&fhdr, rt);
1076                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1077                __skb_queue_tail(&sk->sk_write_queue, skb);
1078
1079                return 0;
1080        }
1081        /* There is not enough support do UPD LSO,
1082         * so follow normal path
1083         */
1084        kfree_skb(skb);
1085
1086        return err;
1087}
1088
1089static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1090                                               gfp_t gfp)
1091{
1092        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1093}
1094
1095static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1096                                                gfp_t gfp)
1097{
1098        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1099}
1100
1101static void ip6_append_data_mtu(int *mtu,
1102                                int *maxfraglen,
1103                                unsigned int fragheaderlen,
1104                                struct sk_buff *skb,
1105                                struct rt6_info *rt)
1106{
1107        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1108                if (skb == NULL) {
1109                        /* first fragment, reserve header_len */
1110                        *mtu = *mtu - rt->dst.header_len;
1111
1112                } else {
1113                        /*
1114                         * this fragment is not first, the headers
1115                         * space is regarded as data space.
1116                         */
1117                        *mtu = dst_mtu(rt->dst.path);
1118                }
1119                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1120                              + fragheaderlen - sizeof(struct frag_hdr);
1121        }
1122}
1123
1124int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1125        int offset, int len, int odd, struct sk_buff *skb),
1126        void *from, int length, int transhdrlen,
1127        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1128        struct rt6_info *rt, unsigned int flags, int dontfrag)
1129{
1130        struct inet_sock *inet = inet_sk(sk);
1131        struct ipv6_pinfo *np = inet6_sk(sk);
1132        struct inet_cork *cork;
1133        struct sk_buff *skb, *skb_prev = NULL;
1134        unsigned int maxfraglen, fragheaderlen;
1135        int exthdrlen;
1136        int dst_exthdrlen;
1137        int hh_len;
1138        int mtu;
1139        int copy;
1140        int err;
1141        int offset = 0;
1142        __u8 tx_flags = 0;
1143
1144        if (flags&MSG_PROBE)
1145                return 0;
1146        cork = &inet->cork.base;
1147        if (skb_queue_empty(&sk->sk_write_queue)) {
1148                /*
1149                 * setup for corking
1150                 */
1151                if (opt) {
1152                        if (WARN_ON(np->cork.opt))
1153                                return -EINVAL;
1154
1155                        np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1156                        if (unlikely(np->cork.opt == NULL))
1157                                return -ENOBUFS;
1158
1159                        np->cork.opt->tot_len = opt->tot_len;
1160                        np->cork.opt->opt_flen = opt->opt_flen;
1161                        np->cork.opt->opt_nflen = opt->opt_nflen;
1162
1163                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1164                                                            sk->sk_allocation);
1165                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1166                                return -ENOBUFS;
1167
1168                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1169                                                            sk->sk_allocation);
1170                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1171                                return -ENOBUFS;
1172
1173                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1174                                                           sk->sk_allocation);
1175                        if (opt->hopopt && !np->cork.opt->hopopt)
1176                                return -ENOBUFS;
1177
1178                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1179                                                            sk->sk_allocation);
1180                        if (opt->srcrt && !np->cork.opt->srcrt)
1181                                return -ENOBUFS;
1182
1183                        /* need source address above miyazawa*/
1184                }
1185                dst_hold(&rt->dst);
1186                cork->dst = &rt->dst;
1187                inet->cork.fl.u.ip6 = *fl6;
1188                np->cork.hop_limit = hlimit;
1189                np->cork.tclass = tclass;
1190                if (rt->dst.flags & DST_XFRM_TUNNEL)
1191                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1192                              rt->dst.dev->mtu : dst_mtu(&rt->dst);
1193                else
1194                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1195                              rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1196                if (np->frag_size < mtu) {
1197                        if (np->frag_size)
1198                                mtu = np->frag_size;
1199                }
1200                cork->fragsize = mtu;
1201                if (dst_allfrag(rt->dst.path))
1202                        cork->flags |= IPCORK_ALLFRAG;
1203                cork->length = 0;
1204                exthdrlen = (opt ? opt->opt_flen : 0);
1205                length += exthdrlen;
1206                transhdrlen += exthdrlen;
1207                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1208        } else {
1209                rt = (struct rt6_info *)cork->dst;
1210                fl6 = &inet->cork.fl.u.ip6;
1211                opt = np->cork.opt;
1212                transhdrlen = 0;
1213                exthdrlen = 0;
1214                dst_exthdrlen = 0;
1215                mtu = cork->fragsize;
1216        }
1217
1218        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1219
1220        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1221                        (opt ? opt->opt_nflen : 0);
1222        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1223
1224        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1225                if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1226                        ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1227                        return -EMSGSIZE;
1228                }
1229        }
1230
1231        /* For UDP, check if TX timestamp is enabled */
1232        if (sk->sk_type == SOCK_DGRAM)
1233                sock_tx_timestamp(sk, &tx_flags);
1234
1235        /*
1236         * Let's try using as much space as possible.
1237         * Use MTU if total length of the message fits into the MTU.
1238         * Otherwise, we need to reserve fragment header and
1239         * fragment alignment (= 8-15 octects, in total).
1240         *
1241         * Note that we may need to "move" the data from the tail of
1242         * of the buffer to the new fragment when we split
1243         * the message.
1244         *
1245         * FIXME: It may be fragmented into multiple chunks
1246         *        at once if non-fragmentable extension headers
1247         *        are too large.
1248         * --yoshfuji
1249         */
1250
1251        cork->length += length;
1252        if (length > mtu) {
1253                int proto = sk->sk_protocol;
1254                if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1255                        ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1256                        return -EMSGSIZE;
1257                }
1258
1259                if (proto == IPPROTO_UDP &&
1260                    (rt->dst.dev->features & NETIF_F_UFO)) {
1261
1262                        err = ip6_ufo_append_data(sk, getfrag, from, length,
1263                                                  hh_len, fragheaderlen,
1264                                                  transhdrlen, mtu, flags, rt);
1265                        if (err)
1266                                goto error;
1267                        return 0;
1268                }
1269        }
1270
1271        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1272                goto alloc_new_skb;
1273
1274        while (length > 0) {
1275                /* Check if the remaining data fits into current packet. */
1276                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1277                if (copy < length)
1278                        copy = maxfraglen - skb->len;
1279
1280                if (copy <= 0) {
1281                        char *data;
1282                        unsigned int datalen;
1283                        unsigned int fraglen;
1284                        unsigned int fraggap;
1285                        unsigned int alloclen;
1286alloc_new_skb:
1287                        /* There's no room in the current skb */
1288                        if (skb)
1289                                fraggap = skb->len - maxfraglen;
1290                        else
1291                                fraggap = 0;
1292                        /* update mtu and maxfraglen if necessary */
1293                        if (skb == NULL || skb_prev == NULL)
1294                                ip6_append_data_mtu(&mtu, &maxfraglen,
1295                                                    fragheaderlen, skb, rt);
1296
1297                        skb_prev = skb;
1298
1299                        /*
1300                         * If remaining data exceeds the mtu,
1301                         * we know we need more fragment(s).
1302                         */
1303                        datalen = length + fraggap;
1304
1305                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1306                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1307                        if ((flags & MSG_MORE) &&
1308                            !(rt->dst.dev->features&NETIF_F_SG))
1309                                alloclen = mtu;
1310                        else
1311                                alloclen = datalen + fragheaderlen;
1312
1313                        alloclen += dst_exthdrlen;
1314
1315                        if (datalen != length + fraggap) {
1316                                /*
1317                                 * this is not the last fragment, the trailer
1318                                 * space is regarded as data space.
1319                                 */
1320                                datalen += rt->dst.trailer_len;
1321                        }
1322
1323                        alloclen += rt->dst.trailer_len;
1324                        fraglen = datalen + fragheaderlen;
1325
1326                        /*
1327                         * We just reserve space for fragment header.
1328                         * Note: this may be overallocation if the message
1329                         * (without MSG_MORE) fits into the MTU.
1330                         */
1331                        alloclen += sizeof(struct frag_hdr);
1332
1333                        if (transhdrlen) {
1334                                skb = sock_alloc_send_skb(sk,
1335                                                alloclen + hh_len,
1336                                                (flags & MSG_DONTWAIT), &err);
1337                        } else {
1338                                skb = NULL;
1339                                if (atomic_read(&sk->sk_wmem_alloc) <=
1340                                    2 * sk->sk_sndbuf)
1341                                        skb = sock_wmalloc(sk,
1342                                                           alloclen + hh_len, 1,
1343                                                           sk->sk_allocation);
1344                                if (unlikely(skb == NULL))
1345                                        err = -ENOBUFS;
1346                                else {
1347                                        /* Only the initial fragment
1348                                         * is time stamped.
1349                                         */
1350                                        tx_flags = 0;
1351                                }
1352                        }
1353                        if (skb == NULL)
1354                                goto error;
1355                        /*
1356                         *      Fill in the control structures
1357                         */
1358                        skb->ip_summed = CHECKSUM_NONE;
1359                        skb->csum = 0;
1360                        /* reserve for fragmentation and ipsec header */
1361                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1362                                    dst_exthdrlen);
1363
1364                        if (sk->sk_type == SOCK_DGRAM)
1365                                skb_shinfo(skb)->tx_flags = tx_flags;
1366
1367                        /*
1368                         *      Find where to start putting bytes
1369                         */
1370                        data = skb_put(skb, fraglen);
1371                        skb_set_network_header(skb, exthdrlen);
1372                        data += fragheaderlen;
1373                        skb->transport_header = (skb->network_header +
1374                                                 fragheaderlen);
1375                        if (fraggap) {
1376                                skb->csum = skb_copy_and_csum_bits(
1377                                        skb_prev, maxfraglen,
1378                                        data + transhdrlen, fraggap, 0);
1379                                skb_prev->csum = csum_sub(skb_prev->csum,
1380                                                          skb->csum);
1381                                data += fraggap;
1382                                pskb_trim_unique(skb_prev, maxfraglen);
1383                        }
1384                        copy = datalen - transhdrlen - fraggap;
1385
1386                        if (copy < 0) {
1387                                err = -EINVAL;
1388                                kfree_skb(skb);
1389                                goto error;
1390                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1391                                err = -EFAULT;
1392                                kfree_skb(skb);
1393                                goto error;
1394                        }
1395
1396                        offset += copy;
1397                        length -= datalen - fraggap;
1398                        transhdrlen = 0;
1399                        exthdrlen = 0;
1400                        dst_exthdrlen = 0;
1401
1402                        /*
1403                         * Put the packet on the pending queue
1404                         */
1405                        __skb_queue_tail(&sk->sk_write_queue, skb);
1406                        continue;
1407                }
1408
1409                if (copy > length)
1410                        copy = length;
1411
1412                if (!(rt->dst.dev->features&NETIF_F_SG)) {
1413                        unsigned int off;
1414
1415                        off = skb->len;
1416                        if (getfrag(from, skb_put(skb, copy),
1417                                                offset, copy, off, skb) < 0) {
1418                                __skb_trim(skb, off);
1419                                err = -EFAULT;
1420                                goto error;
1421                        }
1422                } else {
1423                        int i = skb_shinfo(skb)->nr_frags;
1424                        struct page_frag *pfrag = sk_page_frag(sk);
1425
1426                        err = -ENOMEM;
1427                        if (!sk_page_frag_refill(sk, pfrag))
1428                                goto error;
1429
1430                        if (!skb_can_coalesce(skb, i, pfrag->page,
1431                                              pfrag->offset)) {
1432                                err = -EMSGSIZE;
1433                                if (i == MAX_SKB_FRAGS)
1434                                        goto error;
1435
1436                                __skb_fill_page_desc(skb, i, pfrag->page,
1437                                                     pfrag->offset, 0);
1438                                skb_shinfo(skb)->nr_frags = ++i;
1439                                get_page(pfrag->page);
1440                        }
1441                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1442                        if (getfrag(from,
1443                                    page_address(pfrag->page) + pfrag->offset,
1444                                    offset, copy, skb->len, skb) < 0)
1445                                goto error_efault;
1446
1447                        pfrag->offset += copy;
1448                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1449                        skb->len += copy;
1450                        skb->data_len += copy;
1451                        skb->truesize += copy;
1452                        atomic_add(copy, &sk->sk_wmem_alloc);
1453                }
1454                offset += copy;
1455                length -= copy;
1456        }
1457
1458        return 0;
1459
1460error_efault:
1461        err = -EFAULT;
1462error:
1463        cork->length -= length;
1464        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1465        return err;
1466}
1467EXPORT_SYMBOL_GPL(ip6_append_data);
1468
1469static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1470{
1471        if (np->cork.opt) {
1472                kfree(np->cork.opt->dst0opt);
1473                kfree(np->cork.opt->dst1opt);
1474                kfree(np->cork.opt->hopopt);
1475                kfree(np->cork.opt->srcrt);
1476                kfree(np->cork.opt);
1477                np->cork.opt = NULL;
1478        }
1479
1480        if (inet->cork.base.dst) {
1481                dst_release(inet->cork.base.dst);
1482                inet->cork.base.dst = NULL;
1483                inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1484        }
1485        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1486}
1487
1488int ip6_push_pending_frames(struct sock *sk)
1489{
1490        struct sk_buff *skb, *tmp_skb;
1491        struct sk_buff **tail_skb;
1492        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1493        struct inet_sock *inet = inet_sk(sk);
1494        struct ipv6_pinfo *np = inet6_sk(sk);
1495        struct net *net = sock_net(sk);
1496        struct ipv6hdr *hdr;
1497        struct ipv6_txoptions *opt = np->cork.opt;
1498        struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1499        struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1500        unsigned char proto = fl6->flowi6_proto;
1501        int err = 0;
1502
1503        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1504                goto out;
1505        tail_skb = &(skb_shinfo(skb)->frag_list);
1506
1507        /* move skb->data to ip header from ext header */
1508        if (skb->data < skb_network_header(skb))
1509                __skb_pull(skb, skb_network_offset(skb));
1510        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1511                __skb_pull(tmp_skb, skb_network_header_len(skb));
1512                *tail_skb = tmp_skb;
1513                tail_skb = &(tmp_skb->next);
1514                skb->len += tmp_skb->len;
1515                skb->data_len += tmp_skb->len;
1516                skb->truesize += tmp_skb->truesize;
1517                tmp_skb->destructor = NULL;
1518                tmp_skb->sk = NULL;
1519        }
1520
1521        /* Allow local fragmentation. */
1522        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1523                skb->local_df = 1;
1524
1525        *final_dst = fl6->daddr;
1526        __skb_pull(skb, skb_network_header_len(skb));
1527        if (opt && opt->opt_flen)
1528                ipv6_push_frag_opts(skb, opt, &proto);
1529        if (opt && opt->opt_nflen)
1530                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1531
1532        skb_push(skb, sizeof(struct ipv6hdr));
1533        skb_reset_network_header(skb);
1534        hdr = ipv6_hdr(skb);
1535
1536        ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1537        hdr->hop_limit = np->cork.hop_limit;
1538        hdr->nexthdr = proto;
1539        hdr->saddr = fl6->saddr;
1540        hdr->daddr = *final_dst;
1541
1542        skb->priority = sk->sk_priority;
1543        skb->mark = sk->sk_mark;
1544
1545        skb_dst_set(skb, dst_clone(&rt->dst));
1546        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1547        if (proto == IPPROTO_ICMPV6) {
1548                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1549
1550                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1551                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1552        }
1553
1554        err = ip6_local_out(skb);
1555        if (err) {
1556                if (err > 0)
1557                        err = net_xmit_errno(err);
1558                if (err)
1559                        goto error;
1560        }
1561
1562out:
1563        ip6_cork_release(inet, np);
1564        return err;
1565error:
1566        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1567        goto out;
1568}
1569EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1570
1571void ip6_flush_pending_frames(struct sock *sk)
1572{
1573        struct sk_buff *skb;
1574
1575        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1576                if (skb_dst(skb))
1577                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1578                                      IPSTATS_MIB_OUTDISCARDS);
1579                kfree_skb(skb);
1580        }
1581
1582        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1583}
1584EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1585