linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 output functions
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on linux/net/ipv4/ip_output.c
  10 *
  11 *      Changes:
  12 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13 *                              extension headers are implemented.
  14 *                              route changes now work.
  15 *                              ip6_forward does not confuse sniffers.
  16 *                              etc.
  17 *
  18 *      H. von Brand    :       Added missing #include <linux/string.h>
  19 *      Imran Patel     :       frag id should be in NBO
  20 *      Kazunori MIYAZAWA @USAGI
  21 *                      :       add ip6_append_data and related functions
  22 *                              for datagram xmit
  23 */
  24
  25#include <linux/errno.h>
  26#include <linux/kernel.h>
  27#include <linux/string.h>
  28#include <linux/socket.h>
  29#include <linux/net.h>
  30#include <linux/netdevice.h>
  31#include <linux/if_arp.h>
  32#include <linux/in6.h>
  33#include <linux/tcp.h>
  34#include <linux/route.h>
  35#include <linux/module.h>
  36#include <linux/slab.h>
  37
  38#include <linux/bpf-cgroup.h>
  39#include <linux/netfilter.h>
  40#include <linux/netfilter_ipv6.h>
  41
  42#include <net/sock.h>
  43#include <net/snmp.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ndisc.h>
  47#include <net/protocol.h>
  48#include <net/ip6_route.h>
  49#include <net/addrconf.h>
  50#include <net/rawv6.h>
  51#include <net/icmp.h>
  52#include <net/xfrm.h>
  53#include <net/checksum.h>
  54#include <linux/mroute6.h>
  55#include <net/l3mdev.h>
  56#include <net/lwtunnel.h>
  57#include <net/ip_tunnels.h>
  58
  59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  60{
  61        struct dst_entry *dst = skb_dst(skb);
  62        struct net_device *dev = dst->dev;
  63        unsigned int hh_len = LL_RESERVED_SPACE(dev);
  64        int delta = hh_len - skb_headroom(skb);
  65        const struct in6_addr *nexthop;
  66        struct neighbour *neigh;
  67        int ret;
  68
  69        /* Be paranoid, rather than too clever. */
  70        if (unlikely(delta > 0) && dev->header_ops) {
  71                /* pskb_expand_head() might crash, if skb is shared */
  72                if (skb_shared(skb)) {
  73                        struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
  74
  75                        if (likely(nskb)) {
  76                                if (skb->sk)
  77                                        skb_set_owner_w(nskb, skb->sk);
  78                                consume_skb(skb);
  79                        } else {
  80                                kfree_skb(skb);
  81                        }
  82                        skb = nskb;
  83                }
  84                if (skb &&
  85                    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
  86                        kfree_skb(skb);
  87                        skb = NULL;
  88                }
  89                if (!skb) {
  90                        IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
  91                        return -ENOMEM;
  92                }
  93        }
  94
  95        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  96                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  97
  98                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  99                    ((mroute6_is_socket(net, skb) &&
 100                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 101                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 102                                         &ipv6_hdr(skb)->saddr))) {
 103                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 104
 105                        /* Do not check for IFF_ALLMULTI; multicast routing
 106                           is not supported in any case.
 107                         */
 108                        if (newskb)
 109                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 110                                        net, sk, newskb, NULL, newskb->dev,
 111                                        dev_loopback_xmit);
 112
 113                        if (ipv6_hdr(skb)->hop_limit == 0) {
 114                                IP6_INC_STATS(net, idev,
 115                                              IPSTATS_MIB_OUTDISCARDS);
 116                                kfree_skb(skb);
 117                                return 0;
 118                        }
 119                }
 120
 121                IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
 122
 123                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 124                    IPV6_ADDR_SCOPE_NODELOCAL &&
 125                    !(dev->flags & IFF_LOOPBACK)) {
 126                        kfree_skb(skb);
 127                        return 0;
 128                }
 129        }
 130
 131        if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 132                int res = lwtunnel_xmit(skb);
 133
 134                if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 135                        return res;
 136        }
 137
 138        rcu_read_lock_bh();
 139        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 140        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 141        if (unlikely(!neigh))
 142                neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 143        if (!IS_ERR(neigh)) {
 144                sock_confirm_neigh(skb, neigh);
 145                ret = neigh_output(neigh, skb, false);
 146                rcu_read_unlock_bh();
 147                return ret;
 148        }
 149        rcu_read_unlock_bh();
 150
 151        IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 152        kfree_skb(skb);
 153        return -EINVAL;
 154}
 155
 156static int
 157ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
 158                                    struct sk_buff *skb, unsigned int mtu)
 159{
 160        struct sk_buff *segs, *nskb;
 161        netdev_features_t features;
 162        int ret = 0;
 163
 164        /* Please see corresponding comment in ip_finish_output_gso
 165         * describing the cases where GSO segment length exceeds the
 166         * egress MTU.
 167         */
 168        features = netif_skb_features(skb);
 169        segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 170        if (IS_ERR_OR_NULL(segs)) {
 171                kfree_skb(skb);
 172                return -ENOMEM;
 173        }
 174
 175        consume_skb(skb);
 176
 177        skb_list_walk_safe(segs, segs, nskb) {
 178                int err;
 179
 180                skb_mark_not_on_list(segs);
 181                err = ip6_fragment(net, sk, segs, ip6_finish_output2);
 182                if (err && ret == 0)
 183                        ret = err;
 184        }
 185
 186        return ret;
 187}
 188
 189static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 190{
 191        unsigned int mtu;
 192
 193#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 194        /* Policy lookup after SNAT yielded a new policy */
 195        if (skb_dst(skb)->xfrm) {
 196                IPCB(skb)->flags |= IPSKB_REROUTED;
 197                return dst_output(net, sk, skb);
 198        }
 199#endif
 200
 201        mtu = ip6_skb_dst_mtu(skb);
 202        if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
 203                return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
 204
 205        if ((skb->len > mtu && !skb_is_gso(skb)) ||
 206            dst_allfrag(skb_dst(skb)) ||
 207            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 208                return ip6_fragment(net, sk, skb, ip6_finish_output2);
 209        else
 210                return ip6_finish_output2(net, sk, skb);
 211}
 212
 213static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 214{
 215        int ret;
 216
 217        ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 218        switch (ret) {
 219        case NET_XMIT_SUCCESS:
 220                return __ip6_finish_output(net, sk, skb);
 221        case NET_XMIT_CN:
 222                return __ip6_finish_output(net, sk, skb) ? : ret;
 223        default:
 224                kfree_skb(skb);
 225                return ret;
 226        }
 227}
 228
 229int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 230{
 231        struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 232        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 233
 234        skb->protocol = htons(ETH_P_IPV6);
 235        skb->dev = dev;
 236
 237        if (unlikely(idev->cnf.disable_ipv6)) {
 238                IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 239                kfree_skb(skb);
 240                return 0;
 241        }
 242
 243        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 244                            net, sk, skb, indev, dev,
 245                            ip6_finish_output,
 246                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 247}
 248EXPORT_SYMBOL(ip6_output);
 249
 250bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 251{
 252        if (!np->autoflowlabel_set)
 253                return ip6_default_np_autolabel(net);
 254        else
 255                return np->autoflowlabel;
 256}
 257
 258/*
 259 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 260 * Note : socket lock is not held for SYNACK packets, but might be modified
 261 * by calls to skb_set_owner_w() and ipv6_local_error(),
 262 * which are using proper atomic operations or spinlocks.
 263 */
 264int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 265             __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 266{
 267        struct net *net = sock_net(sk);
 268        const struct ipv6_pinfo *np = inet6_sk(sk);
 269        struct in6_addr *first_hop = &fl6->daddr;
 270        struct dst_entry *dst = skb_dst(skb);
 271        unsigned int head_room;
 272        struct ipv6hdr *hdr;
 273        u8  proto = fl6->flowi6_proto;
 274        int seg_len = skb->len;
 275        int hlimit = -1;
 276        u32 mtu;
 277
 278        head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 279        if (opt)
 280                head_room += opt->opt_nflen + opt->opt_flen;
 281
 282        if (unlikely(skb_headroom(skb) < head_room)) {
 283                struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 284                if (!skb2) {
 285                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 286                                      IPSTATS_MIB_OUTDISCARDS);
 287                        kfree_skb(skb);
 288                        return -ENOBUFS;
 289                }
 290                if (skb->sk)
 291                        skb_set_owner_w(skb2, skb->sk);
 292                consume_skb(skb);
 293                skb = skb2;
 294        }
 295
 296        if (opt) {
 297                seg_len += opt->opt_nflen + opt->opt_flen;
 298
 299                if (opt->opt_flen)
 300                        ipv6_push_frag_opts(skb, opt, &proto);
 301
 302                if (opt->opt_nflen)
 303                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 304                                             &fl6->saddr);
 305        }
 306
 307        skb_push(skb, sizeof(struct ipv6hdr));
 308        skb_reset_network_header(skb);
 309        hdr = ipv6_hdr(skb);
 310
 311        /*
 312         *      Fill in the IPv6 header
 313         */
 314        if (np)
 315                hlimit = np->hop_limit;
 316        if (hlimit < 0)
 317                hlimit = ip6_dst_hoplimit(dst);
 318
 319        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 320                                ip6_autoflowlabel(net, np), fl6));
 321
 322        hdr->payload_len = htons(seg_len);
 323        hdr->nexthdr = proto;
 324        hdr->hop_limit = hlimit;
 325
 326        hdr->saddr = fl6->saddr;
 327        hdr->daddr = *first_hop;
 328
 329        skb->protocol = htons(ETH_P_IPV6);
 330        skb->priority = priority;
 331        skb->mark = mark;
 332
 333        mtu = dst_mtu(dst);
 334        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 335                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 336                              IPSTATS_MIB_OUT, skb->len);
 337
 338                /* if egress device is enslaved to an L3 master device pass the
 339                 * skb to its handler for processing
 340                 */
 341                skb = l3mdev_ip6_out((struct sock *)sk, skb);
 342                if (unlikely(!skb))
 343                        return 0;
 344
 345                /* hooks should never assume socket lock is held.
 346                 * we promote our socket to non const
 347                 */
 348                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 349                               net, (struct sock *)sk, skb, NULL, dst->dev,
 350                               dst_output);
 351        }
 352
 353        skb->dev = dst->dev;
 354        /* ipv6_local_error() does not require socket lock,
 355         * we promote our socket to non const
 356         */
 357        ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 358
 359        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 360        kfree_skb(skb);
 361        return -EMSGSIZE;
 362}
 363EXPORT_SYMBOL(ip6_xmit);
 364
 365static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 366{
 367        struct ip6_ra_chain *ra;
 368        struct sock *last = NULL;
 369
 370        read_lock(&ip6_ra_lock);
 371        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 372                struct sock *sk = ra->sk;
 373                if (sk && ra->sel == sel &&
 374                    (!sk->sk_bound_dev_if ||
 375                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 376                        struct ipv6_pinfo *np = inet6_sk(sk);
 377
 378                        if (np && np->rtalert_isolate &&
 379                            !net_eq(sock_net(sk), dev_net(skb->dev))) {
 380                                continue;
 381                        }
 382                        if (last) {
 383                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 384                                if (skb2)
 385                                        rawv6_rcv(last, skb2);
 386                        }
 387                        last = sk;
 388                }
 389        }
 390
 391        if (last) {
 392                rawv6_rcv(last, skb);
 393                read_unlock(&ip6_ra_lock);
 394                return 1;
 395        }
 396        read_unlock(&ip6_ra_lock);
 397        return 0;
 398}
 399
 400static int ip6_forward_proxy_check(struct sk_buff *skb)
 401{
 402        struct ipv6hdr *hdr = ipv6_hdr(skb);
 403        u8 nexthdr = hdr->nexthdr;
 404        __be16 frag_off;
 405        int offset;
 406
 407        if (ipv6_ext_hdr(nexthdr)) {
 408                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 409                if (offset < 0)
 410                        return 0;
 411        } else
 412                offset = sizeof(struct ipv6hdr);
 413
 414        if (nexthdr == IPPROTO_ICMPV6) {
 415                struct icmp6hdr *icmp6;
 416
 417                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 418                                         offset + 1 - skb->data)))
 419                        return 0;
 420
 421                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 422
 423                switch (icmp6->icmp6_type) {
 424                case NDISC_ROUTER_SOLICITATION:
 425                case NDISC_ROUTER_ADVERTISEMENT:
 426                case NDISC_NEIGHBOUR_SOLICITATION:
 427                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 428                case NDISC_REDIRECT:
 429                        /* For reaction involving unicast neighbor discovery
 430                         * message destined to the proxied address, pass it to
 431                         * input function.
 432                         */
 433                        return 1;
 434                default:
 435                        break;
 436                }
 437        }
 438
 439        /*
 440         * The proxying router can't forward traffic sent to a link-local
 441         * address, so signal the sender and discard the packet. This
 442         * behavior is clarified by the MIPv6 specification.
 443         */
 444        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 445                dst_link_failure(skb);
 446                return -1;
 447        }
 448
 449        return 0;
 450}
 451
 452static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 453                                     struct sk_buff *skb)
 454{
 455        struct dst_entry *dst = skb_dst(skb);
 456
 457        __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 458        __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 459
 460#ifdef CONFIG_NET_SWITCHDEV
 461        if (skb->offload_l3_fwd_mark) {
 462                consume_skb(skb);
 463                return 0;
 464        }
 465#endif
 466
 467        skb->tstamp = 0;
 468        return dst_output(net, sk, skb);
 469}
 470
 471static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 472{
 473        if (skb->len <= mtu)
 474                return false;
 475
 476        /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 477        if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 478                return true;
 479
 480        if (skb->ignore_df)
 481                return false;
 482
 483        if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 484                return false;
 485
 486        return true;
 487}
 488
 489int ip6_forward(struct sk_buff *skb)
 490{
 491        struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 492        struct dst_entry *dst = skb_dst(skb);
 493        struct ipv6hdr *hdr = ipv6_hdr(skb);
 494        struct inet6_skb_parm *opt = IP6CB(skb);
 495        struct net *net = dev_net(dst->dev);
 496        u32 mtu;
 497
 498        if (net->ipv6.devconf_all->forwarding == 0)
 499                goto error;
 500
 501        if (skb->pkt_type != PACKET_HOST)
 502                goto drop;
 503
 504        if (unlikely(skb->sk))
 505                goto drop;
 506
 507        if (skb_warn_if_lro(skb))
 508                goto drop;
 509
 510        if (!net->ipv6.devconf_all->disable_policy &&
 511            !idev->cnf.disable_policy &&
 512            !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 513                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 514                goto drop;
 515        }
 516
 517        skb_forward_csum(skb);
 518
 519        /*
 520         *      We DO NOT make any processing on
 521         *      RA packets, pushing them to user level AS IS
 522         *      without ane WARRANTY that application will be able
 523         *      to interpret them. The reason is that we
 524         *      cannot make anything clever here.
 525         *
 526         *      We are not end-node, so that if packet contains
 527         *      AH/ESP, we cannot make anything.
 528         *      Defragmentation also would be mistake, RA packets
 529         *      cannot be fragmented, because there is no warranty
 530         *      that different fragments will go along one path. --ANK
 531         */
 532        if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 533                if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 534                        return 0;
 535        }
 536
 537        /*
 538         *      check and decrement ttl
 539         */
 540        if (hdr->hop_limit <= 1) {
 541                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 542                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 543
 544                kfree_skb(skb);
 545                return -ETIMEDOUT;
 546        }
 547
 548        /* XXX: idev->cnf.proxy_ndp? */
 549        if (net->ipv6.devconf_all->proxy_ndp &&
 550            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 551                int proxied = ip6_forward_proxy_check(skb);
 552                if (proxied > 0) {
 553                        hdr->hop_limit--;
 554                        return ip6_input(skb);
 555                } else if (proxied < 0) {
 556                        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 557                        goto drop;
 558                }
 559        }
 560
 561        if (!xfrm6_route_forward(skb)) {
 562                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 563                goto drop;
 564        }
 565        dst = skb_dst(skb);
 566
 567        /* IPv6 specs say nothing about it, but it is clear that we cannot
 568           send redirects to source routed frames.
 569           We don't send redirects to frames decapsulated from IPsec.
 570         */
 571        if (IP6CB(skb)->iif == dst->dev->ifindex &&
 572            opt->srcrt == 0 && !skb_sec_path(skb)) {
 573                struct in6_addr *target = NULL;
 574                struct inet_peer *peer;
 575                struct rt6_info *rt;
 576
 577                /*
 578                 *      incoming and outgoing devices are the same
 579                 *      send a redirect.
 580                 */
 581
 582                rt = (struct rt6_info *) dst;
 583                if (rt->rt6i_flags & RTF_GATEWAY)
 584                        target = &rt->rt6i_gateway;
 585                else
 586                        target = &hdr->daddr;
 587
 588                peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 589
 590                /* Limit redirects both by destination (here)
 591                   and by source (inside ndisc_send_redirect)
 592                 */
 593                if (inet_peer_xrlim_allow(peer, 1*HZ))
 594                        ndisc_send_redirect(skb, target);
 595                if (peer)
 596                        inet_putpeer(peer);
 597        } else {
 598                int addrtype = ipv6_addr_type(&hdr->saddr);
 599
 600                /* This check is security critical. */
 601                if (addrtype == IPV6_ADDR_ANY ||
 602                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 603                        goto error;
 604                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 605                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 606                                    ICMPV6_NOT_NEIGHBOUR, 0);
 607                        goto error;
 608                }
 609        }
 610
 611        mtu = ip6_dst_mtu_forward(dst);
 612        if (mtu < IPV6_MIN_MTU)
 613                mtu = IPV6_MIN_MTU;
 614
 615        if (ip6_pkt_too_big(skb, mtu)) {
 616                /* Again, force OUTPUT device used as source address */
 617                skb->dev = dst->dev;
 618                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 619                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 620                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 621                                IPSTATS_MIB_FRAGFAILS);
 622                kfree_skb(skb);
 623                return -EMSGSIZE;
 624        }
 625
 626        if (skb_cow(skb, dst->dev->hard_header_len)) {
 627                __IP6_INC_STATS(net, ip6_dst_idev(dst),
 628                                IPSTATS_MIB_OUTDISCARDS);
 629                goto drop;
 630        }
 631
 632        hdr = ipv6_hdr(skb);
 633
 634        /* Mangling hops number delayed to point after skb COW */
 635
 636        hdr->hop_limit--;
 637
 638        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 639                       net, NULL, skb, skb->dev, dst->dev,
 640                       ip6_forward_finish);
 641
 642error:
 643        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 644drop:
 645        kfree_skb(skb);
 646        return -EINVAL;
 647}
 648
 649static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 650{
 651        to->pkt_type = from->pkt_type;
 652        to->priority = from->priority;
 653        to->protocol = from->protocol;
 654        skb_dst_drop(to);
 655        skb_dst_set(to, dst_clone(skb_dst(from)));
 656        to->dev = from->dev;
 657        to->mark = from->mark;
 658
 659        skb_copy_hash(to, from);
 660
 661#ifdef CONFIG_NET_SCHED
 662        to->tc_index = from->tc_index;
 663#endif
 664        nf_copy(to, from);
 665        skb_ext_copy(to, from);
 666        skb_copy_secmark(to, from);
 667}
 668
 669int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 670                      u8 nexthdr, __be32 frag_id,
 671                      struct ip6_fraglist_iter *iter)
 672{
 673        unsigned int first_len;
 674        struct frag_hdr *fh;
 675
 676        /* BUILD HEADER */
 677        *prevhdr = NEXTHDR_FRAGMENT;
 678        iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 679        if (!iter->tmp_hdr)
 680                return -ENOMEM;
 681
 682        iter->frag = skb_shinfo(skb)->frag_list;
 683        skb_frag_list_init(skb);
 684
 685        iter->offset = 0;
 686        iter->hlen = hlen;
 687        iter->frag_id = frag_id;
 688        iter->nexthdr = nexthdr;
 689
 690        __skb_pull(skb, hlen);
 691        fh = __skb_push(skb, sizeof(struct frag_hdr));
 692        __skb_push(skb, hlen);
 693        skb_reset_network_header(skb);
 694        memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 695
 696        fh->nexthdr = nexthdr;
 697        fh->reserved = 0;
 698        fh->frag_off = htons(IP6_MF);
 699        fh->identification = frag_id;
 700
 701        first_len = skb_pagelen(skb);
 702        skb->data_len = first_len - skb_headlen(skb);
 703        skb->len = first_len;
 704        ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 705
 706        return 0;
 707}
 708EXPORT_SYMBOL(ip6_fraglist_init);
 709
 710void ip6_fraglist_prepare(struct sk_buff *skb,
 711                          struct ip6_fraglist_iter *iter)
 712{
 713        struct sk_buff *frag = iter->frag;
 714        unsigned int hlen = iter->hlen;
 715        struct frag_hdr *fh;
 716
 717        frag->ip_summed = CHECKSUM_NONE;
 718        skb_reset_transport_header(frag);
 719        fh = __skb_push(frag, sizeof(struct frag_hdr));
 720        __skb_push(frag, hlen);
 721        skb_reset_network_header(frag);
 722        memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 723        iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 724        fh->nexthdr = iter->nexthdr;
 725        fh->reserved = 0;
 726        fh->frag_off = htons(iter->offset);
 727        if (frag->next)
 728                fh->frag_off |= htons(IP6_MF);
 729        fh->identification = iter->frag_id;
 730        ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 731        ip6_copy_metadata(frag, skb);
 732}
 733EXPORT_SYMBOL(ip6_fraglist_prepare);
 734
 735void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 736                   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 737                   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 738{
 739        state->prevhdr = prevhdr;
 740        state->nexthdr = nexthdr;
 741        state->frag_id = frag_id;
 742
 743        state->hlen = hlen;
 744        state->mtu = mtu;
 745
 746        state->left = skb->len - hlen;  /* Space per frame */
 747        state->ptr = hlen;              /* Where to start from */
 748
 749        state->hroom = hdr_room;
 750        state->troom = needed_tailroom;
 751
 752        state->offset = 0;
 753}
 754EXPORT_SYMBOL(ip6_frag_init);
 755
 756struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 757{
 758        u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 759        struct sk_buff *frag;
 760        struct frag_hdr *fh;
 761        unsigned int len;
 762
 763        len = state->left;
 764        /* IF: it doesn't fit, use 'mtu' - the data space left */
 765        if (len > state->mtu)
 766                len = state->mtu;
 767        /* IF: we are not sending up to and including the packet end
 768           then align the next start on an eight byte boundary */
 769        if (len < state->left)
 770                len &= ~7;
 771
 772        /* Allocate buffer */
 773        frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 774                         state->hroom + state->troom, GFP_ATOMIC);
 775        if (!frag)
 776                return ERR_PTR(-ENOMEM);
 777
 778        /*
 779         *      Set up data on packet
 780         */
 781
 782        ip6_copy_metadata(frag, skb);
 783        skb_reserve(frag, state->hroom);
 784        skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 785        skb_reset_network_header(frag);
 786        fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 787        frag->transport_header = (frag->network_header + state->hlen +
 788                                  sizeof(struct frag_hdr));
 789
 790        /*
 791         *      Charge the memory for the fragment to any owner
 792         *      it might possess
 793         */
 794        if (skb->sk)
 795                skb_set_owner_w(frag, skb->sk);
 796
 797        /*
 798         *      Copy the packet header into the new buffer.
 799         */
 800        skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 801
 802        fragnexthdr_offset = skb_network_header(frag);
 803        fragnexthdr_offset += prevhdr - skb_network_header(skb);
 804        *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 805
 806        /*
 807         *      Build fragment header.
 808         */
 809        fh->nexthdr = state->nexthdr;
 810        fh->reserved = 0;
 811        fh->identification = state->frag_id;
 812
 813        /*
 814         *      Copy a block of the IP datagram.
 815         */
 816        BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 817                             len));
 818        state->left -= len;
 819
 820        fh->frag_off = htons(state->offset);
 821        if (state->left > 0)
 822                fh->frag_off |= htons(IP6_MF);
 823        ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 824
 825        state->ptr += len;
 826        state->offset += len;
 827
 828        return frag;
 829}
 830EXPORT_SYMBOL(ip6_frag_next);
 831
 832int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 833                 int (*output)(struct net *, struct sock *, struct sk_buff *))
 834{
 835        struct sk_buff *frag;
 836        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 837        struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 838                                inet6_sk(skb->sk) : NULL;
 839        struct ip6_frag_state state;
 840        unsigned int mtu, hlen, nexthdr_offset;
 841        ktime_t tstamp = skb->tstamp;
 842        int hroom, err = 0;
 843        __be32 frag_id;
 844        u8 *prevhdr, nexthdr = 0;
 845
 846        err = ip6_find_1stfragopt(skb, &prevhdr);
 847        if (err < 0)
 848                goto fail;
 849        hlen = err;
 850        nexthdr = *prevhdr;
 851        nexthdr_offset = prevhdr - skb_network_header(skb);
 852
 853        mtu = ip6_skb_dst_mtu(skb);
 854
 855        /* We must not fragment if the socket is set to force MTU discovery
 856         * or if the skb it not generated by a local socket.
 857         */
 858        if (unlikely(!skb->ignore_df && skb->len > mtu))
 859                goto fail_toobig;
 860
 861        if (IP6CB(skb)->frag_max_size) {
 862                if (IP6CB(skb)->frag_max_size > mtu)
 863                        goto fail_toobig;
 864
 865                /* don't send fragments larger than what we received */
 866                mtu = IP6CB(skb)->frag_max_size;
 867                if (mtu < IPV6_MIN_MTU)
 868                        mtu = IPV6_MIN_MTU;
 869        }
 870
 871        if (np && np->frag_size < mtu) {
 872                if (np->frag_size)
 873                        mtu = np->frag_size;
 874        }
 875        if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 876                goto fail_toobig;
 877        mtu -= hlen + sizeof(struct frag_hdr);
 878
 879        frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 880                                    &ipv6_hdr(skb)->saddr);
 881
 882        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 883            (err = skb_checksum_help(skb)))
 884                goto fail;
 885
 886        prevhdr = skb_network_header(skb) + nexthdr_offset;
 887        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 888        if (skb_has_frag_list(skb)) {
 889                unsigned int first_len = skb_pagelen(skb);
 890                struct ip6_fraglist_iter iter;
 891                struct sk_buff *frag2;
 892
 893                if (first_len - hlen > mtu ||
 894                    ((first_len - hlen) & 7) ||
 895                    skb_cloned(skb) ||
 896                    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 897                        goto slow_path;
 898
 899                skb_walk_frags(skb, frag) {
 900                        /* Correct geometry. */
 901                        if (frag->len > mtu ||
 902                            ((frag->len & 7) && frag->next) ||
 903                            skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 904                                goto slow_path_clean;
 905
 906                        /* Partially cloned skb? */
 907                        if (skb_shared(frag))
 908                                goto slow_path_clean;
 909
 910                        BUG_ON(frag->sk);
 911                        if (skb->sk) {
 912                                frag->sk = skb->sk;
 913                                frag->destructor = sock_wfree;
 914                        }
 915                        skb->truesize -= frag->truesize;
 916                }
 917
 918                err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 919                                        &iter);
 920                if (err < 0)
 921                        goto fail;
 922
 923                for (;;) {
 924                        /* Prepare header of the next frame,
 925                         * before previous one went down. */
 926                        if (iter.frag)
 927                                ip6_fraglist_prepare(skb, &iter);
 928
 929                        skb->tstamp = tstamp;
 930                        err = output(net, sk, skb);
 931                        if (!err)
 932                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 933                                              IPSTATS_MIB_FRAGCREATES);
 934
 935                        if (err || !iter.frag)
 936                                break;
 937
 938                        skb = ip6_fraglist_next(&iter);
 939                }
 940
 941                kfree(iter.tmp_hdr);
 942
 943                if (err == 0) {
 944                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 945                                      IPSTATS_MIB_FRAGOKS);
 946                        return 0;
 947                }
 948
 949                kfree_skb_list(iter.frag);
 950
 951                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 952                              IPSTATS_MIB_FRAGFAILS);
 953                return err;
 954
 955slow_path_clean:
 956                skb_walk_frags(skb, frag2) {
 957                        if (frag2 == frag)
 958                                break;
 959                        frag2->sk = NULL;
 960                        frag2->destructor = NULL;
 961                        skb->truesize += frag2->truesize;
 962                }
 963        }
 964
 965slow_path:
 966        /*
 967         *      Fragment the datagram.
 968         */
 969
 970        ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 971                      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 972                      &state);
 973
 974        /*
 975         *      Keep copying data until we run out.
 976         */
 977
 978        while (state.left > 0) {
 979                frag = ip6_frag_next(skb, &state);
 980                if (IS_ERR(frag)) {
 981                        err = PTR_ERR(frag);
 982                        goto fail;
 983                }
 984
 985                /*
 986                 *      Put this fragment into the sending queue.
 987                 */
 988                frag->tstamp = tstamp;
 989                err = output(net, sk, frag);
 990                if (err)
 991                        goto fail;
 992
 993                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 994                              IPSTATS_MIB_FRAGCREATES);
 995        }
 996        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 997                      IPSTATS_MIB_FRAGOKS);
 998        consume_skb(skb);
 999        return err;
1000
1001fail_toobig:
1002        if (skb->sk && dst_allfrag(skb_dst(skb)))
1003                sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1004
1005        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1006        err = -EMSGSIZE;
1007
1008fail:
1009        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1010                      IPSTATS_MIB_FRAGFAILS);
1011        kfree_skb(skb);
1012        return err;
1013}
1014
1015static inline int ip6_rt_check(const struct rt6key *rt_key,
1016                               const struct in6_addr *fl_addr,
1017                               const struct in6_addr *addr_cache)
1018{
1019        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1020                (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1021}
1022
1023static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1024                                          struct dst_entry *dst,
1025                                          const struct flowi6 *fl6)
1026{
1027        struct ipv6_pinfo *np = inet6_sk(sk);
1028        struct rt6_info *rt;
1029
1030        if (!dst)
1031                goto out;
1032
1033        if (dst->ops->family != AF_INET6) {
1034                dst_release(dst);
1035                return NULL;
1036        }
1037
1038        rt = (struct rt6_info *)dst;
1039        /* Yes, checking route validity in not connected
1040         * case is not very simple. Take into account,
1041         * that we do not support routing by source, TOS,
1042         * and MSG_DONTROUTE            --ANK (980726)
1043         *
1044         * 1. ip6_rt_check(): If route was host route,
1045         *    check that cached destination is current.
1046         *    If it is network route, we still may
1047         *    check its validity using saved pointer
1048         *    to the last used address: daddr_cache.
1049         *    We do not want to save whole address now,
1050         *    (because main consumer of this service
1051         *    is tcp, which has not this problem),
1052         *    so that the last trick works only on connected
1053         *    sockets.
1054         * 2. oif also should be the same.
1055         */
1056        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1057#ifdef CONFIG_IPV6_SUBTREES
1058            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1059#endif
1060           (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1061              (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1062                dst_release(dst);
1063                dst = NULL;
1064        }
1065
1066out:
1067        return dst;
1068}
1069
1070static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1071                               struct dst_entry **dst, struct flowi6 *fl6)
1072{
1073#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1074        struct neighbour *n;
1075        struct rt6_info *rt;
1076#endif
1077        int err;
1078        int flags = 0;
1079
1080        /* The correct way to handle this would be to do
1081         * ip6_route_get_saddr, and then ip6_route_output; however,
1082         * the route-specific preferred source forces the
1083         * ip6_route_output call _before_ ip6_route_get_saddr.
1084         *
1085         * In source specific routing (no src=any default route),
1086         * ip6_route_output will fail given src=any saddr, though, so
1087         * that's why we try it again later.
1088         */
1089        if (ipv6_addr_any(&fl6->saddr)) {
1090                struct fib6_info *from;
1091                struct rt6_info *rt;
1092
1093                *dst = ip6_route_output(net, sk, fl6);
1094                rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1095
1096                rcu_read_lock();
1097                from = rt ? rcu_dereference(rt->from) : NULL;
1098                err = ip6_route_get_saddr(net, from, &fl6->daddr,
1099                                          sk ? inet6_sk(sk)->srcprefs : 0,
1100                                          &fl6->saddr);
1101                rcu_read_unlock();
1102
1103                if (err)
1104                        goto out_err_release;
1105
1106                /* If we had an erroneous initial result, pretend it
1107                 * never existed and let the SA-enabled version take
1108                 * over.
1109                 */
1110                if ((*dst)->error) {
1111                        dst_release(*dst);
1112                        *dst = NULL;
1113                }
1114
1115                if (fl6->flowi6_oif)
1116                        flags |= RT6_LOOKUP_F_IFACE;
1117        }
1118
1119        if (!*dst)
1120                *dst = ip6_route_output_flags(net, sk, fl6, flags);
1121
1122        err = (*dst)->error;
1123        if (err)
1124                goto out_err_release;
1125
1126#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1127        /*
1128         * Here if the dst entry we've looked up
1129         * has a neighbour entry that is in the INCOMPLETE
1130         * state and the src address from the flow is
1131         * marked as OPTIMISTIC, we release the found
1132         * dst entry and replace it instead with the
1133         * dst entry of the nexthop router
1134         */
1135        rt = (struct rt6_info *) *dst;
1136        rcu_read_lock_bh();
1137        n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1138                                      rt6_nexthop(rt, &fl6->daddr));
1139        err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1140        rcu_read_unlock_bh();
1141
1142        if (err) {
1143                struct inet6_ifaddr *ifp;
1144                struct flowi6 fl_gw6;
1145                int redirect;
1146
1147                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1148                                      (*dst)->dev, 1);
1149
1150                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1151                if (ifp)
1152                        in6_ifa_put(ifp);
1153
1154                if (redirect) {
1155                        /*
1156                         * We need to get the dst entry for the
1157                         * default router instead
1158                         */
1159                        dst_release(*dst);
1160                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1161                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1162                        *dst = ip6_route_output(net, sk, &fl_gw6);
1163                        err = (*dst)->error;
1164                        if (err)
1165                                goto out_err_release;
1166                }
1167        }
1168#endif
1169        if (ipv6_addr_v4mapped(&fl6->saddr) &&
1170            !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1171                err = -EAFNOSUPPORT;
1172                goto out_err_release;
1173        }
1174
1175        return 0;
1176
1177out_err_release:
1178        dst_release(*dst);
1179        *dst = NULL;
1180
1181        if (err == -ENETUNREACH)
1182                IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1183        return err;
1184}
1185
1186/**
1187 *      ip6_dst_lookup - perform route lookup on flow
1188 *      @net: Network namespace to perform lookup in
1189 *      @sk: socket which provides route info
1190 *      @dst: pointer to dst_entry * for result
1191 *      @fl6: flow to lookup
1192 *
1193 *      This function performs a route lookup on the given flow.
1194 *
1195 *      It returns zero on success, or a standard errno code on error.
1196 */
1197int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1198                   struct flowi6 *fl6)
1199{
1200        *dst = NULL;
1201        return ip6_dst_lookup_tail(net, sk, dst, fl6);
1202}
1203EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1204
1205/**
1206 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1207 *      @net: Network namespace to perform lookup in
1208 *      @sk: socket which provides route info
1209 *      @fl6: flow to lookup
1210 *      @final_dst: final destination address for ipsec lookup
1211 *
1212 *      This function performs a route lookup on the given flow.
1213 *
1214 *      It returns a valid dst pointer on success, or a pointer encoded
1215 *      error code.
1216 */
1217struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1218                                      const struct in6_addr *final_dst)
1219{
1220        struct dst_entry *dst = NULL;
1221        int err;
1222
1223        err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1224        if (err)
1225                return ERR_PTR(err);
1226        if (final_dst)
1227                fl6->daddr = *final_dst;
1228
1229        return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1230}
1231EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1232
1233/**
1234 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1235 *      @sk: socket which provides the dst cache and route info
1236 *      @fl6: flow to lookup
1237 *      @final_dst: final destination address for ipsec lookup
1238 *      @connected: whether @sk is connected or not
1239 *
1240 *      This function performs a route lookup on the given flow with the
1241 *      possibility of using the cached route in the socket if it is valid.
1242 *      It will take the socket dst lock when operating on the dst cache.
1243 *      As a result, this function can only be used in process context.
1244 *
1245 *      In addition, for a connected socket, cache the dst in the socket
1246 *      if the current cache is not valid.
1247 *
1248 *      It returns a valid dst pointer on success, or a pointer encoded
1249 *      error code.
1250 */
1251struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1252                                         const struct in6_addr *final_dst,
1253                                         bool connected)
1254{
1255        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1256
1257        dst = ip6_sk_dst_check(sk, dst, fl6);
1258        if (dst)
1259                return dst;
1260
1261        dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1262        if (connected && !IS_ERR(dst))
1263                ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1264
1265        return dst;
1266}
1267EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1268
1269/**
1270 *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1271 *      @skb: Packet for which lookup is done
1272 *      @dev: Tunnel device
1273 *      @net: Network namespace of tunnel device
1274 *      @sock: Socket which provides route info
1275 *      @saddr: Memory to store the src ip address
1276 *      @info: Tunnel information
1277 *      @protocol: IP protocol
1278 *      @use_cache: Flag to enable cache usage
1279 *      This function performs a route lookup on a tunnel
1280 *
1281 *      It returns a valid dst pointer and stores src address to be used in
1282 *      tunnel in param saddr on success, else a pointer encoded error code.
1283 */
1284
1285struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1286                                        struct net_device *dev,
1287                                        struct net *net,
1288                                        struct socket *sock,
1289                                        struct in6_addr *saddr,
1290                                        const struct ip_tunnel_info *info,
1291                                        u8 protocol,
1292                                        bool use_cache)
1293{
1294        struct dst_entry *dst = NULL;
1295#ifdef CONFIG_DST_CACHE
1296        struct dst_cache *dst_cache;
1297#endif
1298        struct flowi6 fl6;
1299        __u8 prio;
1300
1301#ifdef CONFIG_DST_CACHE
1302        dst_cache = (struct dst_cache *)&info->dst_cache;
1303        if (use_cache) {
1304                dst = dst_cache_get_ip6(dst_cache, saddr);
1305                if (dst)
1306                        return dst;
1307        }
1308#endif
1309        memset(&fl6, 0, sizeof(fl6));
1310        fl6.flowi6_mark = skb->mark;
1311        fl6.flowi6_proto = protocol;
1312        fl6.daddr = info->key.u.ipv6.dst;
1313        fl6.saddr = info->key.u.ipv6.src;
1314        prio = info->key.tos;
1315        fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1316                                          info->key.label);
1317
1318        dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1319                                              NULL);
1320        if (IS_ERR(dst)) {
1321                netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1322                return ERR_PTR(-ENETUNREACH);
1323        }
1324        if (dst->dev == dev) { /* is this necessary? */
1325                netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1326                dst_release(dst);
1327                return ERR_PTR(-ELOOP);
1328        }
1329#ifdef CONFIG_DST_CACHE
1330        if (use_cache)
1331                dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1332#endif
1333        *saddr = fl6.saddr;
1334        return dst;
1335}
1336EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1337
1338static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1339                                               gfp_t gfp)
1340{
1341        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1342}
1343
1344static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1345                                                gfp_t gfp)
1346{
1347        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1348}
1349
1350static void ip6_append_data_mtu(unsigned int *mtu,
1351                                int *maxfraglen,
1352                                unsigned int fragheaderlen,
1353                                struct sk_buff *skb,
1354                                struct rt6_info *rt,
1355                                unsigned int orig_mtu)
1356{
1357        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1358                if (!skb) {
1359                        /* first fragment, reserve header_len */
1360                        *mtu = orig_mtu - rt->dst.header_len;
1361
1362                } else {
1363                        /*
1364                         * this fragment is not first, the headers
1365                         * space is regarded as data space.
1366                         */
1367                        *mtu = orig_mtu;
1368                }
1369                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1370                              + fragheaderlen - sizeof(struct frag_hdr);
1371        }
1372}
1373
1374static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1375                          struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1376                          struct rt6_info *rt, struct flowi6 *fl6)
1377{
1378        struct ipv6_pinfo *np = inet6_sk(sk);
1379        unsigned int mtu;
1380        struct ipv6_txoptions *opt = ipc6->opt;
1381
1382        /*
1383         * setup for corking
1384         */
1385        if (opt) {
1386                if (WARN_ON(v6_cork->opt))
1387                        return -EINVAL;
1388
1389                v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1390                if (unlikely(!v6_cork->opt))
1391                        return -ENOBUFS;
1392
1393                v6_cork->opt->tot_len = sizeof(*opt);
1394                v6_cork->opt->opt_flen = opt->opt_flen;
1395                v6_cork->opt->opt_nflen = opt->opt_nflen;
1396
1397                v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1398                                                    sk->sk_allocation);
1399                if (opt->dst0opt && !v6_cork->opt->dst0opt)
1400                        return -ENOBUFS;
1401
1402                v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1403                                                    sk->sk_allocation);
1404                if (opt->dst1opt && !v6_cork->opt->dst1opt)
1405                        return -ENOBUFS;
1406
1407                v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1408                                                   sk->sk_allocation);
1409                if (opt->hopopt && !v6_cork->opt->hopopt)
1410                        return -ENOBUFS;
1411
1412                v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1413                                                    sk->sk_allocation);
1414                if (opt->srcrt && !v6_cork->opt->srcrt)
1415                        return -ENOBUFS;
1416
1417                /* need source address above miyazawa*/
1418        }
1419        dst_hold(&rt->dst);
1420        cork->base.dst = &rt->dst;
1421        cork->fl.u.ip6 = *fl6;
1422        v6_cork->hop_limit = ipc6->hlimit;
1423        v6_cork->tclass = ipc6->tclass;
1424        if (rt->dst.flags & DST_XFRM_TUNNEL)
1425                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1426                      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1427        else
1428                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1429                        READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1430        if (np->frag_size < mtu) {
1431                if (np->frag_size)
1432                        mtu = np->frag_size;
1433        }
1434        if (mtu < IPV6_MIN_MTU)
1435                return -EINVAL;
1436        cork->base.fragsize = mtu;
1437        cork->base.gso_size = ipc6->gso_size;
1438        cork->base.tx_flags = 0;
1439        cork->base.mark = ipc6->sockc.mark;
1440        sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1441
1442        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1443                cork->base.flags |= IPCORK_ALLFRAG;
1444        cork->base.length = 0;
1445
1446        cork->base.transmit_time = ipc6->sockc.transmit_time;
1447
1448        return 0;
1449}
1450
1451static int __ip6_append_data(struct sock *sk,
1452                             struct flowi6 *fl6,
1453                             struct sk_buff_head *queue,
1454                             struct inet_cork *cork,
1455                             struct inet6_cork *v6_cork,
1456                             struct page_frag *pfrag,
1457                             int getfrag(void *from, char *to, int offset,
1458                                         int len, int odd, struct sk_buff *skb),
1459                             void *from, int length, int transhdrlen,
1460                             unsigned int flags, struct ipcm6_cookie *ipc6)
1461{
1462        struct sk_buff *skb, *skb_prev = NULL;
1463        unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1464        struct ubuf_info *uarg = NULL;
1465        int exthdrlen = 0;
1466        int dst_exthdrlen = 0;
1467        int hh_len;
1468        int copy;
1469        int err;
1470        int offset = 0;
1471        u32 tskey = 0;
1472        struct rt6_info *rt = (struct rt6_info *)cork->dst;
1473        struct ipv6_txoptions *opt = v6_cork->opt;
1474        int csummode = CHECKSUM_NONE;
1475        unsigned int maxnonfragsize, headersize;
1476        unsigned int wmem_alloc_delta = 0;
1477        bool paged, extra_uref = false;
1478
1479        skb = skb_peek_tail(queue);
1480        if (!skb) {
1481                exthdrlen = opt ? opt->opt_flen : 0;
1482                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1483        }
1484
1485        paged = !!cork->gso_size;
1486        mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1487        orig_mtu = mtu;
1488
1489        if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1490            sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1491                tskey = sk->sk_tskey++;
1492
1493        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1494
1495        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1496                        (opt ? opt->opt_nflen : 0);
1497        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1498                     sizeof(struct frag_hdr);
1499
1500        headersize = sizeof(struct ipv6hdr) +
1501                     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1502                     (dst_allfrag(&rt->dst) ?
1503                      sizeof(struct frag_hdr) : 0) +
1504                     rt->rt6i_nfheader_len;
1505
1506        /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1507         * the first fragment
1508         */
1509        if (headersize + transhdrlen > mtu)
1510                goto emsgsize;
1511
1512        if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1513            (sk->sk_protocol == IPPROTO_UDP ||
1514             sk->sk_protocol == IPPROTO_RAW)) {
1515                ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1516                                sizeof(struct ipv6hdr));
1517                goto emsgsize;
1518        }
1519
1520        if (ip6_sk_ignore_df(sk))
1521                maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1522        else
1523                maxnonfragsize = mtu;
1524
1525        if (cork->length + length > maxnonfragsize - headersize) {
1526emsgsize:
1527                pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1528                ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1529                return -EMSGSIZE;
1530        }
1531
1532        /* CHECKSUM_PARTIAL only with no extension headers and when
1533         * we are not going to fragment
1534         */
1535        if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1536            headersize == sizeof(struct ipv6hdr) &&
1537            length <= mtu - headersize &&
1538            (!(flags & MSG_MORE) || cork->gso_size) &&
1539            rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1540                csummode = CHECKSUM_PARTIAL;
1541
1542        if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1543                uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1544                if (!uarg)
1545                        return -ENOBUFS;
1546                extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1547                if (rt->dst.dev->features & NETIF_F_SG &&
1548                    csummode == CHECKSUM_PARTIAL) {
1549                        paged = true;
1550                } else {
1551                        uarg->zerocopy = 0;
1552                        skb_zcopy_set(skb, uarg, &extra_uref);
1553                }
1554        }
1555
1556        /*
1557         * Let's try using as much space as possible.
1558         * Use MTU if total length of the message fits into the MTU.
1559         * Otherwise, we need to reserve fragment header and
1560         * fragment alignment (= 8-15 octects, in total).
1561         *
1562         * Note that we may need to "move" the data from the tail
1563         * of the buffer to the new fragment when we split
1564         * the message.
1565         *
1566         * FIXME: It may be fragmented into multiple chunks
1567         *        at once if non-fragmentable extension headers
1568         *        are too large.
1569         * --yoshfuji
1570         */
1571
1572        cork->length += length;
1573        if (!skb)
1574                goto alloc_new_skb;
1575
1576        while (length > 0) {
1577                /* Check if the remaining data fits into current packet. */
1578                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1579                if (copy < length)
1580                        copy = maxfraglen - skb->len;
1581
1582                if (copy <= 0) {
1583                        char *data;
1584                        unsigned int datalen;
1585                        unsigned int fraglen;
1586                        unsigned int fraggap;
1587                        unsigned int alloclen, alloc_extra;
1588                        unsigned int pagedlen;
1589alloc_new_skb:
1590                        /* There's no room in the current skb */
1591                        if (skb)
1592                                fraggap = skb->len - maxfraglen;
1593                        else
1594                                fraggap = 0;
1595                        /* update mtu and maxfraglen if necessary */
1596                        if (!skb || !skb_prev)
1597                                ip6_append_data_mtu(&mtu, &maxfraglen,
1598                                                    fragheaderlen, skb, rt,
1599                                                    orig_mtu);
1600
1601                        skb_prev = skb;
1602
1603                        /*
1604                         * If remaining data exceeds the mtu,
1605                         * we know we need more fragment(s).
1606                         */
1607                        datalen = length + fraggap;
1608
1609                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1610                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1611                        fraglen = datalen + fragheaderlen;
1612                        pagedlen = 0;
1613
1614                        alloc_extra = hh_len;
1615                        alloc_extra += dst_exthdrlen;
1616                        alloc_extra += rt->dst.trailer_len;
1617
1618                        /* We just reserve space for fragment header.
1619                         * Note: this may be overallocation if the message
1620                         * (without MSG_MORE) fits into the MTU.
1621                         */
1622                        alloc_extra += sizeof(struct frag_hdr);
1623
1624                        if ((flags & MSG_MORE) &&
1625                            !(rt->dst.dev->features&NETIF_F_SG))
1626                                alloclen = mtu;
1627                        else if (!paged &&
1628                                 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1629                                  !(rt->dst.dev->features & NETIF_F_SG)))
1630                                alloclen = fraglen;
1631                        else {
1632                                alloclen = min_t(int, fraglen, MAX_HEADER);
1633                                pagedlen = fraglen - alloclen;
1634                        }
1635                        alloclen += alloc_extra;
1636
1637                        if (datalen != length + fraggap) {
1638                                /*
1639                                 * this is not the last fragment, the trailer
1640                                 * space is regarded as data space.
1641                                 */
1642                                datalen += rt->dst.trailer_len;
1643                        }
1644
1645                        fraglen = datalen + fragheaderlen;
1646
1647                        copy = datalen - transhdrlen - fraggap - pagedlen;
1648                        if (copy < 0) {
1649                                err = -EINVAL;
1650                                goto error;
1651                        }
1652                        if (transhdrlen) {
1653                                skb = sock_alloc_send_skb(sk, alloclen,
1654                                                (flags & MSG_DONTWAIT), &err);
1655                        } else {
1656                                skb = NULL;
1657                                if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1658                                    2 * sk->sk_sndbuf)
1659                                        skb = alloc_skb(alloclen,
1660                                                        sk->sk_allocation);
1661                                if (unlikely(!skb))
1662                                        err = -ENOBUFS;
1663                        }
1664                        if (!skb)
1665                                goto error;
1666                        /*
1667                         *      Fill in the control structures
1668                         */
1669                        skb->protocol = htons(ETH_P_IPV6);
1670                        skb->ip_summed = csummode;
1671                        skb->csum = 0;
1672                        /* reserve for fragmentation and ipsec header */
1673                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1674                                    dst_exthdrlen);
1675
1676                        /*
1677                         *      Find where to start putting bytes
1678                         */
1679                        data = skb_put(skb, fraglen - pagedlen);
1680                        skb_set_network_header(skb, exthdrlen);
1681                        data += fragheaderlen;
1682                        skb->transport_header = (skb->network_header +
1683                                                 fragheaderlen);
1684                        if (fraggap) {
1685                                skb->csum = skb_copy_and_csum_bits(
1686                                        skb_prev, maxfraglen,
1687                                        data + transhdrlen, fraggap);
1688                                skb_prev->csum = csum_sub(skb_prev->csum,
1689                                                          skb->csum);
1690                                data += fraggap;
1691                                pskb_trim_unique(skb_prev, maxfraglen);
1692                        }
1693                        if (copy > 0 &&
1694                            getfrag(from, data + transhdrlen, offset,
1695                                    copy, fraggap, skb) < 0) {
1696                                err = -EFAULT;
1697                                kfree_skb(skb);
1698                                goto error;
1699                        }
1700
1701                        offset += copy;
1702                        length -= copy + transhdrlen;
1703                        transhdrlen = 0;
1704                        exthdrlen = 0;
1705                        dst_exthdrlen = 0;
1706
1707                        /* Only the initial fragment is time stamped */
1708                        skb_shinfo(skb)->tx_flags = cork->tx_flags;
1709                        cork->tx_flags = 0;
1710                        skb_shinfo(skb)->tskey = tskey;
1711                        tskey = 0;
1712                        skb_zcopy_set(skb, uarg, &extra_uref);
1713
1714                        if ((flags & MSG_CONFIRM) && !skb_prev)
1715                                skb_set_dst_pending_confirm(skb, 1);
1716
1717                        /*
1718                         * Put the packet on the pending queue
1719                         */
1720                        if (!skb->destructor) {
1721                                skb->destructor = sock_wfree;
1722                                skb->sk = sk;
1723                                wmem_alloc_delta += skb->truesize;
1724                        }
1725                        __skb_queue_tail(queue, skb);
1726                        continue;
1727                }
1728
1729                if (copy > length)
1730                        copy = length;
1731
1732                if (!(rt->dst.dev->features&NETIF_F_SG) &&
1733                    skb_tailroom(skb) >= copy) {
1734                        unsigned int off;
1735
1736                        off = skb->len;
1737                        if (getfrag(from, skb_put(skb, copy),
1738                                                offset, copy, off, skb) < 0) {
1739                                __skb_trim(skb, off);
1740                                err = -EFAULT;
1741                                goto error;
1742                        }
1743                } else if (!uarg || !uarg->zerocopy) {
1744                        int i = skb_shinfo(skb)->nr_frags;
1745
1746                        err = -ENOMEM;
1747                        if (!sk_page_frag_refill(sk, pfrag))
1748                                goto error;
1749
1750                        if (!skb_can_coalesce(skb, i, pfrag->page,
1751                                              pfrag->offset)) {
1752                                err = -EMSGSIZE;
1753                                if (i == MAX_SKB_FRAGS)
1754                                        goto error;
1755
1756                                __skb_fill_page_desc(skb, i, pfrag->page,
1757                                                     pfrag->offset, 0);
1758                                skb_shinfo(skb)->nr_frags = ++i;
1759                                get_page(pfrag->page);
1760                        }
1761                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1762                        if (getfrag(from,
1763                                    page_address(pfrag->page) + pfrag->offset,
1764                                    offset, copy, skb->len, skb) < 0)
1765                                goto error_efault;
1766
1767                        pfrag->offset += copy;
1768                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1769                        skb->len += copy;
1770                        skb->data_len += copy;
1771                        skb->truesize += copy;
1772                        wmem_alloc_delta += copy;
1773                } else {
1774                        err = skb_zerocopy_iter_dgram(skb, from, copy);
1775                        if (err < 0)
1776                                goto error;
1777                }
1778                offset += copy;
1779                length -= copy;
1780        }
1781
1782        if (wmem_alloc_delta)
1783                refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1784        return 0;
1785
1786error_efault:
1787        err = -EFAULT;
1788error:
1789        net_zcopy_put_abort(uarg, extra_uref);
1790        cork->length -= length;
1791        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1792        refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1793        return err;
1794}
1795
1796int ip6_append_data(struct sock *sk,
1797                    int getfrag(void *from, char *to, int offset, int len,
1798                                int odd, struct sk_buff *skb),
1799                    void *from, int length, int transhdrlen,
1800                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1801                    struct rt6_info *rt, unsigned int flags)
1802{
1803        struct inet_sock *inet = inet_sk(sk);
1804        struct ipv6_pinfo *np = inet6_sk(sk);
1805        int exthdrlen;
1806        int err;
1807
1808        if (flags&MSG_PROBE)
1809                return 0;
1810        if (skb_queue_empty(&sk->sk_write_queue)) {
1811                /*
1812                 * setup for corking
1813                 */
1814                err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1815                                     ipc6, rt, fl6);
1816                if (err)
1817                        return err;
1818
1819                exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1820                length += exthdrlen;
1821                transhdrlen += exthdrlen;
1822        } else {
1823                fl6 = &inet->cork.fl.u.ip6;
1824                transhdrlen = 0;
1825        }
1826
1827        return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1828                                 &np->cork, sk_page_frag(sk), getfrag,
1829                                 from, length, transhdrlen, flags, ipc6);
1830}
1831EXPORT_SYMBOL_GPL(ip6_append_data);
1832
1833static void ip6_cork_release(struct inet_cork_full *cork,
1834                             struct inet6_cork *v6_cork)
1835{
1836        if (v6_cork->opt) {
1837                kfree(v6_cork->opt->dst0opt);
1838                kfree(v6_cork->opt->dst1opt);
1839                kfree(v6_cork->opt->hopopt);
1840                kfree(v6_cork->opt->srcrt);
1841                kfree(v6_cork->opt);
1842                v6_cork->opt = NULL;
1843        }
1844
1845        if (cork->base.dst) {
1846                dst_release(cork->base.dst);
1847                cork->base.dst = NULL;
1848                cork->base.flags &= ~IPCORK_ALLFRAG;
1849        }
1850        memset(&cork->fl, 0, sizeof(cork->fl));
1851}
1852
1853struct sk_buff *__ip6_make_skb(struct sock *sk,
1854                               struct sk_buff_head *queue,
1855                               struct inet_cork_full *cork,
1856                               struct inet6_cork *v6_cork)
1857{
1858        struct sk_buff *skb, *tmp_skb;
1859        struct sk_buff **tail_skb;
1860        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1861        struct ipv6_pinfo *np = inet6_sk(sk);
1862        struct net *net = sock_net(sk);
1863        struct ipv6hdr *hdr;
1864        struct ipv6_txoptions *opt = v6_cork->opt;
1865        struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1866        struct flowi6 *fl6 = &cork->fl.u.ip6;
1867        unsigned char proto = fl6->flowi6_proto;
1868
1869        skb = __skb_dequeue(queue);
1870        if (!skb)
1871                goto out;
1872        tail_skb = &(skb_shinfo(skb)->frag_list);
1873
1874        /* move skb->data to ip header from ext header */
1875        if (skb->data < skb_network_header(skb))
1876                __skb_pull(skb, skb_network_offset(skb));
1877        while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1878                __skb_pull(tmp_skb, skb_network_header_len(skb));
1879                *tail_skb = tmp_skb;
1880                tail_skb = &(tmp_skb->next);
1881                skb->len += tmp_skb->len;
1882                skb->data_len += tmp_skb->len;
1883                skb->truesize += tmp_skb->truesize;
1884                tmp_skb->destructor = NULL;
1885                tmp_skb->sk = NULL;
1886        }
1887
1888        /* Allow local fragmentation. */
1889        skb->ignore_df = ip6_sk_ignore_df(sk);
1890
1891        *final_dst = fl6->daddr;
1892        __skb_pull(skb, skb_network_header_len(skb));
1893        if (opt && opt->opt_flen)
1894                ipv6_push_frag_opts(skb, opt, &proto);
1895        if (opt && opt->opt_nflen)
1896                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1897
1898        skb_push(skb, sizeof(struct ipv6hdr));
1899        skb_reset_network_header(skb);
1900        hdr = ipv6_hdr(skb);
1901
1902        ip6_flow_hdr(hdr, v6_cork->tclass,
1903                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1904                                        ip6_autoflowlabel(net, np), fl6));
1905        hdr->hop_limit = v6_cork->hop_limit;
1906        hdr->nexthdr = proto;
1907        hdr->saddr = fl6->saddr;
1908        hdr->daddr = *final_dst;
1909
1910        skb->priority = sk->sk_priority;
1911        skb->mark = cork->base.mark;
1912
1913        skb->tstamp = cork->base.transmit_time;
1914
1915        skb_dst_set(skb, dst_clone(&rt->dst));
1916        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1917        if (proto == IPPROTO_ICMPV6) {
1918                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1919
1920                ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1921                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1922        }
1923
1924        ip6_cork_release(cork, v6_cork);
1925out:
1926        return skb;
1927}
1928
1929int ip6_send_skb(struct sk_buff *skb)
1930{
1931        struct net *net = sock_net(skb->sk);
1932        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1933        int err;
1934
1935        err = ip6_local_out(net, skb->sk, skb);
1936        if (err) {
1937                if (err > 0)
1938                        err = net_xmit_errno(err);
1939                if (err)
1940                        IP6_INC_STATS(net, rt->rt6i_idev,
1941                                      IPSTATS_MIB_OUTDISCARDS);
1942        }
1943
1944        return err;
1945}
1946
1947int ip6_push_pending_frames(struct sock *sk)
1948{
1949        struct sk_buff *skb;
1950
1951        skb = ip6_finish_skb(sk);
1952        if (!skb)
1953                return 0;
1954
1955        return ip6_send_skb(skb);
1956}
1957EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1958
1959static void __ip6_flush_pending_frames(struct sock *sk,
1960                                       struct sk_buff_head *queue,
1961                                       struct inet_cork_full *cork,
1962                                       struct inet6_cork *v6_cork)
1963{
1964        struct sk_buff *skb;
1965
1966        while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1967                if (skb_dst(skb))
1968                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1969                                      IPSTATS_MIB_OUTDISCARDS);
1970                kfree_skb(skb);
1971        }
1972
1973        ip6_cork_release(cork, v6_cork);
1974}
1975
1976void ip6_flush_pending_frames(struct sock *sk)
1977{
1978        __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1979                                   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1980}
1981EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1982
1983struct sk_buff *ip6_make_skb(struct sock *sk,
1984                             int getfrag(void *from, char *to, int offset,
1985                                         int len, int odd, struct sk_buff *skb),
1986                             void *from, int length, int transhdrlen,
1987                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1988                             struct rt6_info *rt, unsigned int flags,
1989                             struct inet_cork_full *cork)
1990{
1991        struct inet6_cork v6_cork;
1992        struct sk_buff_head queue;
1993        int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1994        int err;
1995
1996        if (flags & MSG_PROBE)
1997                return NULL;
1998
1999        __skb_queue_head_init(&queue);
2000
2001        cork->base.flags = 0;
2002        cork->base.addr = 0;
2003        cork->base.opt = NULL;
2004        cork->base.dst = NULL;
2005        v6_cork.opt = NULL;
2006        err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
2007        if (err) {
2008                ip6_cork_release(cork, &v6_cork);
2009                return ERR_PTR(err);
2010        }
2011        if (ipc6->dontfrag < 0)
2012                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2013
2014        err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
2015                                &current->task_frag, getfrag, from,
2016                                length + exthdrlen, transhdrlen + exthdrlen,
2017                                flags, ipc6);
2018        if (err) {
2019                __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2020                return ERR_PTR(err);
2021        }
2022
2023        return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2024}
2025