linux/net/ipv6/reassembly.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 fragment reassembly
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on: net/ipv4/ip_fragment.c
  10 */
  11
  12/*
  13 *      Fixes:
  14 *      Andi Kleen      Make it work with multiple hosts.
  15 *                      More RFC compliance.
  16 *
  17 *      Horst von Brand Add missing #include <linux/string.h>
  18 *      Alexey Kuznetsov        SMP races, threading, cleanup.
  19 *      Patrick McHardy         LRU queue of frag heads for evictor.
  20 *      Mitsuru KANDA @USAGI    Register inet6_protocol{}.
  21 *      David Stevens and
  22 *      YOSHIFUJI,H. @USAGI     Always remove fragment header to
  23 *                              calculate ICV correctly.
  24 */
  25
  26#define pr_fmt(fmt) "IPv6: " fmt
  27
  28#include <linux/errno.h>
  29#include <linux/types.h>
  30#include <linux/string.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/jiffies.h>
  34#include <linux/net.h>
  35#include <linux/list.h>
  36#include <linux/netdevice.h>
  37#include <linux/in6.h>
  38#include <linux/ipv6.h>
  39#include <linux/icmpv6.h>
  40#include <linux/random.h>
  41#include <linux/jhash.h>
  42#include <linux/skbuff.h>
  43#include <linux/slab.h>
  44#include <linux/export.h>
  45#include <linux/tcp.h>
  46#include <linux/udp.h>
  47
  48#include <net/sock.h>
  49#include <net/snmp.h>
  50
  51#include <net/ipv6.h>
  52#include <net/ip6_route.h>
  53#include <net/protocol.h>
  54#include <net/transp_v6.h>
  55#include <net/rawv6.h>
  56#include <net/ndisc.h>
  57#include <net/addrconf.h>
  58#include <net/ipv6_frag.h>
  59#include <net/inet_ecn.h>
  60
  61static const char ip6_frag_cache_name[] = "ip6-frags";
  62
  63static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
  64{
  65        return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
  66}
  67
  68static struct inet_frags ip6_frags;
  69
  70static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
  71                          struct sk_buff *prev_tail, struct net_device *dev);
  72
  73static void ip6_frag_expire(struct timer_list *t)
  74{
  75        struct inet_frag_queue *frag = from_timer(frag, t, timer);
  76        struct frag_queue *fq;
  77
  78        fq = container_of(frag, struct frag_queue, q);
  79
  80        ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
  81}
  82
  83static struct frag_queue *
  84fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
  85{
  86        struct frag_v6_compare_key key = {
  87                .id = id,
  88                .saddr = hdr->saddr,
  89                .daddr = hdr->daddr,
  90                .user = IP6_DEFRAG_LOCAL_DELIVER,
  91                .iif = iif,
  92        };
  93        struct inet_frag_queue *q;
  94
  95        if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
  96                                            IPV6_ADDR_LINKLOCAL)))
  97                key.iif = 0;
  98
  99        q = inet_frag_find(net->ipv6.fqdir, &key);
 100        if (!q)
 101                return NULL;
 102
 103        return container_of(q, struct frag_queue, q);
 104}
 105
 106static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 107                          struct frag_hdr *fhdr, int nhoff,
 108                          u32 *prob_offset)
 109{
 110        struct net *net = dev_net(skb_dst(skb)->dev);
 111        int offset, end, fragsize;
 112        struct sk_buff *prev_tail;
 113        struct net_device *dev;
 114        int err = -ENOENT;
 115        u8 ecn;
 116
 117        if (fq->q.flags & INET_FRAG_COMPLETE)
 118                goto err;
 119
 120        err = -EINVAL;
 121        offset = ntohs(fhdr->frag_off) & ~0x7;
 122        end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
 123                        ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 124
 125        if ((unsigned int)end > IPV6_MAXPLEN) {
 126                *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
 127                /* note that if prob_offset is set, the skb is freed elsewhere,
 128                 * we do not free it here.
 129                 */
 130                return -1;
 131        }
 132
 133        ecn = ip6_frag_ecn(ipv6_hdr(skb));
 134
 135        if (skb->ip_summed == CHECKSUM_COMPLETE) {
 136                const unsigned char *nh = skb_network_header(skb);
 137                skb->csum = csum_sub(skb->csum,
 138                                     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
 139                                                  0));
 140        }
 141
 142        /* Is this the final fragment? */
 143        if (!(fhdr->frag_off & htons(IP6_MF))) {
 144                /* If we already have some bits beyond end
 145                 * or have different end, the segment is corrupted.
 146                 */
 147                if (end < fq->q.len ||
 148                    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
 149                        goto discard_fq;
 150                fq->q.flags |= INET_FRAG_LAST_IN;
 151                fq->q.len = end;
 152        } else {
 153                /* Check if the fragment is rounded to 8 bytes.
 154                 * Required by the RFC.
 155                 */
 156                if (end & 0x7) {
 157                        /* RFC2460 says always send parameter problem in
 158                         * this case. -DaveM
 159                         */
 160                        *prob_offset = offsetof(struct ipv6hdr, payload_len);
 161                        return -1;
 162                }
 163                if (end > fq->q.len) {
 164                        /* Some bits beyond end -> corruption. */
 165                        if (fq->q.flags & INET_FRAG_LAST_IN)
 166                                goto discard_fq;
 167                        fq->q.len = end;
 168                }
 169        }
 170
 171        if (end == offset)
 172                goto discard_fq;
 173
 174        err = -ENOMEM;
 175        /* Point into the IP datagram 'data' part. */
 176        if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
 177                goto discard_fq;
 178
 179        err = pskb_trim_rcsum(skb, end - offset);
 180        if (err)
 181                goto discard_fq;
 182
 183        /* Note : skb->rbnode and skb->dev share the same location. */
 184        dev = skb->dev;
 185        /* Makes sure compiler wont do silly aliasing games */
 186        barrier();
 187
 188        prev_tail = fq->q.fragments_tail;
 189        err = inet_frag_queue_insert(&fq->q, skb, offset, end);
 190        if (err)
 191                goto insert_error;
 192
 193        if (dev)
 194                fq->iif = dev->ifindex;
 195
 196        fq->q.stamp = skb->tstamp;
 197        fq->q.meat += skb->len;
 198        fq->ecn |= ecn;
 199        add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 200
 201        fragsize = -skb_network_offset(skb) + skb->len;
 202        if (fragsize > fq->q.max_size)
 203                fq->q.max_size = fragsize;
 204
 205        /* The first fragment.
 206         * nhoffset is obtained from the first fragment, of course.
 207         */
 208        if (offset == 0) {
 209                fq->nhoffset = nhoff;
 210                fq->q.flags |= INET_FRAG_FIRST_IN;
 211        }
 212
 213        if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 214            fq->q.meat == fq->q.len) {
 215                unsigned long orefdst = skb->_skb_refdst;
 216
 217                skb->_skb_refdst = 0UL;
 218                err = ip6_frag_reasm(fq, skb, prev_tail, dev);
 219                skb->_skb_refdst = orefdst;
 220                return err;
 221        }
 222
 223        skb_dst_drop(skb);
 224        return -EINPROGRESS;
 225
 226insert_error:
 227        if (err == IPFRAG_DUP) {
 228                kfree_skb(skb);
 229                return -EINVAL;
 230        }
 231        err = -EINVAL;
 232        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 233                        IPSTATS_MIB_REASM_OVERLAPS);
 234discard_fq:
 235        inet_frag_kill(&fq->q);
 236        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 237                        IPSTATS_MIB_REASMFAILS);
 238err:
 239        kfree_skb(skb);
 240        return err;
 241}
 242
 243/*
 244 *      Check if this packet is complete.
 245 *
 246 *      It is called with locked fq, and caller must check that
 247 *      queue is eligible for reassembly i.e. it is not COMPLETE,
 248 *      the last and the first frames arrived and all the bits are here.
 249 */
 250static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 251                          struct sk_buff *prev_tail, struct net_device *dev)
 252{
 253        struct net *net = fq->q.fqdir->net;
 254        unsigned int nhoff;
 255        void *reasm_data;
 256        int payload_len;
 257        u8 ecn;
 258
 259        inet_frag_kill(&fq->q);
 260
 261        ecn = ip_frag_ecn_table[fq->ecn];
 262        if (unlikely(ecn == 0xff))
 263                goto out_fail;
 264
 265        reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
 266        if (!reasm_data)
 267                goto out_oom;
 268
 269        payload_len = ((skb->data - skb_network_header(skb)) -
 270                       sizeof(struct ipv6hdr) + fq->q.len -
 271                       sizeof(struct frag_hdr));
 272        if (payload_len > IPV6_MAXPLEN)
 273                goto out_oversize;
 274
 275        /* We have to remove fragment header from datagram and to relocate
 276         * header in order to calculate ICV correctly. */
 277        nhoff = fq->nhoffset;
 278        skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
 279        memmove(skb->head + sizeof(struct frag_hdr), skb->head,
 280                (skb->data - skb->head) - sizeof(struct frag_hdr));
 281        if (skb_mac_header_was_set(skb))
 282                skb->mac_header += sizeof(struct frag_hdr);
 283        skb->network_header += sizeof(struct frag_hdr);
 284
 285        skb_reset_transport_header(skb);
 286
 287        inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
 288
 289        skb->dev = dev;
 290        ipv6_hdr(skb)->payload_len = htons(payload_len);
 291        ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
 292        IP6CB(skb)->nhoff = nhoff;
 293        IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 294        IP6CB(skb)->frag_max_size = fq->q.max_size;
 295
 296        /* Yes, and fold redundant checksum back. 8) */
 297        skb_postpush_rcsum(skb, skb_network_header(skb),
 298                           skb_network_header_len(skb));
 299
 300        rcu_read_lock();
 301        __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
 302        rcu_read_unlock();
 303        fq->q.rb_fragments = RB_ROOT;
 304        fq->q.fragments_tail = NULL;
 305        fq->q.last_run_head = NULL;
 306        return 1;
 307
 308out_oversize:
 309        net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
 310        goto out_fail;
 311out_oom:
 312        net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
 313out_fail:
 314        rcu_read_lock();
 315        __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
 316        rcu_read_unlock();
 317        inet_frag_kill(&fq->q);
 318        return -1;
 319}
 320
 321static int ipv6_frag_rcv(struct sk_buff *skb)
 322{
 323        struct frag_hdr *fhdr;
 324        struct frag_queue *fq;
 325        const struct ipv6hdr *hdr = ipv6_hdr(skb);
 326        struct net *net = dev_net(skb_dst(skb)->dev);
 327        u8 nexthdr;
 328        int iif;
 329
 330        if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
 331                goto fail_hdr;
 332
 333        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 334
 335        /* Jumbo payload inhibits frag. header */
 336        if (hdr->payload_len == 0)
 337                goto fail_hdr;
 338
 339        if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
 340                                 sizeof(struct frag_hdr))))
 341                goto fail_hdr;
 342
 343        hdr = ipv6_hdr(skb);
 344        fhdr = (struct frag_hdr *)skb_transport_header(skb);
 345
 346        if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) {
 347                /* It is not a fragmented frame */
 348                skb->transport_header += sizeof(struct frag_hdr);
 349                __IP6_INC_STATS(net,
 350                                ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 351
 352                IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 353                IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 354                IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) +
 355                                            sizeof(struct ipv6hdr);
 356                return 1;
 357        }
 358
 359        /* RFC 8200, Section 4.5 Fragment Header:
 360         * If the first fragment does not include all headers through an
 361         * Upper-Layer header, then that fragment should be discarded and
 362         * an ICMP Parameter Problem, Code 3, message should be sent to
 363         * the source of the fragment, with the Pointer field set to zero.
 364         */
 365        nexthdr = hdr->nexthdr;
 366        if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
 367                __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 368                                IPSTATS_MIB_INHDRERRORS);
 369                icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
 370                return -1;
 371        }
 372
 373        iif = skb->dev ? skb->dev->ifindex : 0;
 374        fq = fq_find(net, fhdr->identification, hdr, iif);
 375        if (fq) {
 376                u32 prob_offset = 0;
 377                int ret;
 378
 379                spin_lock(&fq->q.lock);
 380
 381                fq->iif = iif;
 382                ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
 383                                     &prob_offset);
 384
 385                spin_unlock(&fq->q.lock);
 386                inet_frag_put(&fq->q);
 387                if (prob_offset) {
 388                        __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 389                                        IPSTATS_MIB_INHDRERRORS);
 390                        /* icmpv6_param_prob() calls kfree_skb(skb) */
 391                        icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
 392                }
 393                return ret;
 394        }
 395
 396        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
 397        kfree_skb(skb);
 398        return -1;
 399
 400fail_hdr:
 401        __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 402                        IPSTATS_MIB_INHDRERRORS);
 403        icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
 404        return -1;
 405}
 406
 407static const struct inet6_protocol frag_protocol = {
 408        .handler        =       ipv6_frag_rcv,
 409        .flags          =       INET6_PROTO_NOPOLICY,
 410};
 411
 412#ifdef CONFIG_SYSCTL
 413
 414static struct ctl_table ip6_frags_ns_ctl_table[] = {
 415        {
 416                .procname       = "ip6frag_high_thresh",
 417                .maxlen         = sizeof(unsigned long),
 418                .mode           = 0644,
 419                .proc_handler   = proc_doulongvec_minmax,
 420        },
 421        {
 422                .procname       = "ip6frag_low_thresh",
 423                .maxlen         = sizeof(unsigned long),
 424                .mode           = 0644,
 425                .proc_handler   = proc_doulongvec_minmax,
 426        },
 427        {
 428                .procname       = "ip6frag_time",
 429                .maxlen         = sizeof(int),
 430                .mode           = 0644,
 431                .proc_handler   = proc_dointvec_jiffies,
 432        },
 433        { }
 434};
 435
 436/* secret interval has been deprecated */
 437static int ip6_frags_secret_interval_unused;
 438static struct ctl_table ip6_frags_ctl_table[] = {
 439        {
 440                .procname       = "ip6frag_secret_interval",
 441                .data           = &ip6_frags_secret_interval_unused,
 442                .maxlen         = sizeof(int),
 443                .mode           = 0644,
 444                .proc_handler   = proc_dointvec_jiffies,
 445        },
 446        { }
 447};
 448
 449static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 450{
 451        struct ctl_table *table;
 452        struct ctl_table_header *hdr;
 453
 454        table = ip6_frags_ns_ctl_table;
 455        if (!net_eq(net, &init_net)) {
 456                table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
 457                if (!table)
 458                        goto err_alloc;
 459
 460        }
 461        table[0].data   = &net->ipv6.fqdir->high_thresh;
 462        table[0].extra1 = &net->ipv6.fqdir->low_thresh;
 463        table[1].data   = &net->ipv6.fqdir->low_thresh;
 464        table[1].extra2 = &net->ipv6.fqdir->high_thresh;
 465        table[2].data   = &net->ipv6.fqdir->timeout;
 466
 467        hdr = register_net_sysctl(net, "net/ipv6", table);
 468        if (!hdr)
 469                goto err_reg;
 470
 471        net->ipv6.sysctl.frags_hdr = hdr;
 472        return 0;
 473
 474err_reg:
 475        if (!net_eq(net, &init_net))
 476                kfree(table);
 477err_alloc:
 478        return -ENOMEM;
 479}
 480
 481static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
 482{
 483        struct ctl_table *table;
 484
 485        table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
 486        unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
 487        if (!net_eq(net, &init_net))
 488                kfree(table);
 489}
 490
 491static struct ctl_table_header *ip6_ctl_header;
 492
 493static int ip6_frags_sysctl_register(void)
 494{
 495        ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
 496                        ip6_frags_ctl_table);
 497        return ip6_ctl_header == NULL ? -ENOMEM : 0;
 498}
 499
 500static void ip6_frags_sysctl_unregister(void)
 501{
 502        unregister_net_sysctl_table(ip6_ctl_header);
 503}
 504#else
 505static int ip6_frags_ns_sysctl_register(struct net *net)
 506{
 507        return 0;
 508}
 509
 510static void ip6_frags_ns_sysctl_unregister(struct net *net)
 511{
 512}
 513
 514static int ip6_frags_sysctl_register(void)
 515{
 516        return 0;
 517}
 518
 519static void ip6_frags_sysctl_unregister(void)
 520{
 521}
 522#endif
 523
 524static int __net_init ipv6_frags_init_net(struct net *net)
 525{
 526        int res;
 527
 528        res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
 529        if (res < 0)
 530                return res;
 531
 532        net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
 533        net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
 534        net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
 535
 536        res = ip6_frags_ns_sysctl_register(net);
 537        if (res < 0)
 538                fqdir_exit(net->ipv6.fqdir);
 539        return res;
 540}
 541
 542static void __net_exit ipv6_frags_pre_exit_net(struct net *net)
 543{
 544        fqdir_pre_exit(net->ipv6.fqdir);
 545}
 546
 547static void __net_exit ipv6_frags_exit_net(struct net *net)
 548{
 549        ip6_frags_ns_sysctl_unregister(net);
 550        fqdir_exit(net->ipv6.fqdir);
 551}
 552
 553static struct pernet_operations ip6_frags_ops = {
 554        .init           = ipv6_frags_init_net,
 555        .pre_exit       = ipv6_frags_pre_exit_net,
 556        .exit           = ipv6_frags_exit_net,
 557};
 558
 559static const struct rhashtable_params ip6_rhash_params = {
 560        .head_offset            = offsetof(struct inet_frag_queue, node),
 561        .hashfn                 = ip6frag_key_hashfn,
 562        .obj_hashfn             = ip6frag_obj_hashfn,
 563        .obj_cmpfn              = ip6frag_obj_cmpfn,
 564        .automatic_shrinking    = true,
 565};
 566
 567int __init ipv6_frag_init(void)
 568{
 569        int ret;
 570
 571        ip6_frags.constructor = ip6frag_init;
 572        ip6_frags.destructor = NULL;
 573        ip6_frags.qsize = sizeof(struct frag_queue);
 574        ip6_frags.frag_expire = ip6_frag_expire;
 575        ip6_frags.frags_cache_name = ip6_frag_cache_name;
 576        ip6_frags.rhash_params = ip6_rhash_params;
 577        ret = inet_frags_init(&ip6_frags);
 578        if (ret)
 579                goto out;
 580
 581        ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 582        if (ret)
 583                goto err_protocol;
 584
 585        ret = ip6_frags_sysctl_register();
 586        if (ret)
 587                goto err_sysctl;
 588
 589        ret = register_pernet_subsys(&ip6_frags_ops);
 590        if (ret)
 591                goto err_pernet;
 592
 593out:
 594        return ret;
 595
 596err_pernet:
 597        ip6_frags_sysctl_unregister();
 598err_sysctl:
 599        inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 600err_protocol:
 601        inet_frags_fini(&ip6_frags);
 602        goto out;
 603}
 604
 605void ipv6_frag_exit(void)
 606{
 607        ip6_frags_sysctl_unregister();
 608        unregister_pernet_subsys(&ip6_frags_ops);
 609        inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 610        inet_frags_fini(&ip6_frags);
 611}
 612