linux/net/ipv6/reassembly.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 fragment reassembly
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on: net/ipv4/ip_fragment.c
  10 */
  11
  12/*
  13 *      Fixes:
  14 *      Andi Kleen      Make it work with multiple hosts.
  15 *                      More RFC compliance.
  16 *
  17 *      Horst von Brand Add missing #include <linux/string.h>
  18 *      Alexey Kuznetsov        SMP races, threading, cleanup.
  19 *      Patrick McHardy         LRU queue of frag heads for evictor.
  20 *      Mitsuru KANDA @USAGI    Register inet6_protocol{}.
  21 *      David Stevens and
  22 *      YOSHIFUJI,H. @USAGI     Always remove fragment header to
  23 *                              calculate ICV correctly.
  24 */
  25
  26#define pr_fmt(fmt) "IPv6: " fmt
  27
  28#include <linux/errno.h>
  29#include <linux/types.h>
  30#include <linux/string.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/jiffies.h>
  34#include <linux/net.h>
  35#include <linux/list.h>
  36#include <linux/netdevice.h>
  37#include <linux/in6.h>
  38#include <linux/ipv6.h>
  39#include <linux/icmpv6.h>
  40#include <linux/random.h>
  41#include <linux/jhash.h>
  42#include <linux/skbuff.h>
  43#include <linux/slab.h>
  44#include <linux/export.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ip6_route.h>
  51#include <net/protocol.h>
  52#include <net/transp_v6.h>
  53#include <net/rawv6.h>
  54#include <net/ndisc.h>
  55#include <net/addrconf.h>
  56#include <net/ipv6_frag.h>
  57#include <net/inet_ecn.h>
  58
  59static const char ip6_frag_cache_name[] = "ip6-frags";
  60
  61static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
  62{
  63        return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
  64}
  65
  66static struct inet_frags ip6_frags;
  67
  68static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
  69                          struct sk_buff *prev_tail, struct net_device *dev);
  70
  71static void ip6_frag_expire(struct timer_list *t)
  72{
  73        struct inet_frag_queue *frag = from_timer(frag, t, timer);
  74        struct frag_queue *fq;
  75        struct net *net;
  76
  77        fq = container_of(frag, struct frag_queue, q);
  78        net = container_of(fq->q.net, struct net, ipv6.frags);
  79
  80        ip6frag_expire_frag_queue(net, fq);
  81}
  82
  83static struct frag_queue *
  84fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
  85{
  86        struct frag_v6_compare_key key = {
  87                .id = id,
  88                .saddr = hdr->saddr,
  89                .daddr = hdr->daddr,
  90                .user = IP6_DEFRAG_LOCAL_DELIVER,
  91                .iif = iif,
  92        };
  93        struct inet_frag_queue *q;
  94
  95        if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
  96                                            IPV6_ADDR_LINKLOCAL)))
  97                key.iif = 0;
  98
  99        q = inet_frag_find(&net->ipv6.frags, &key);
 100        if (!q)
 101                return NULL;
 102
 103        return container_of(q, struct frag_queue, q);
 104}
 105
 106static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 107                          struct frag_hdr *fhdr, int nhoff,
 108                          u32 *prob_offset)
 109{
 110        struct net *net = dev_net(skb_dst(skb)->dev);
 111        int offset, end, fragsize;
 112        struct sk_buff *prev_tail;
 113        struct net_device *dev;
 114        int err = -ENOENT;
 115        u8 ecn;
 116
 117        if (fq->q.flags & INET_FRAG_COMPLETE)
 118                goto err;
 119
 120        err = -EINVAL;
 121        offset = ntohs(fhdr->frag_off) & ~0x7;
 122        end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
 123                        ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 124
 125        if ((unsigned int)end > IPV6_MAXPLEN) {
 126                *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
 127                /* note that if prob_offset is set, the skb is freed elsewhere,
 128                 * we do not free it here.
 129                 */
 130                return -1;
 131        }
 132
 133        ecn = ip6_frag_ecn(ipv6_hdr(skb));
 134
 135        if (skb->ip_summed == CHECKSUM_COMPLETE) {
 136                const unsigned char *nh = skb_network_header(skb);
 137                skb->csum = csum_sub(skb->csum,
 138                                     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
 139                                                  0));
 140        }
 141
 142        /* Is this the final fragment? */
 143        if (!(fhdr->frag_off & htons(IP6_MF))) {
 144                /* If we already have some bits beyond end
 145                 * or have different end, the segment is corrupted.
 146                 */
 147                if (end < fq->q.len ||
 148                    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
 149                        goto discard_fq;
 150                fq->q.flags |= INET_FRAG_LAST_IN;
 151                fq->q.len = end;
 152        } else {
 153                /* Check if the fragment is rounded to 8 bytes.
 154                 * Required by the RFC.
 155                 */
 156                if (end & 0x7) {
 157                        /* RFC2460 says always send parameter problem in
 158                         * this case. -DaveM
 159                         */
 160                        *prob_offset = offsetof(struct ipv6hdr, payload_len);
 161                        return -1;
 162                }
 163                if (end > fq->q.len) {
 164                        /* Some bits beyond end -> corruption. */
 165                        if (fq->q.flags & INET_FRAG_LAST_IN)
 166                                goto discard_fq;
 167                        fq->q.len = end;
 168                }
 169        }
 170
 171        if (end == offset)
 172                goto discard_fq;
 173
 174        err = -ENOMEM;
 175        /* Point into the IP datagram 'data' part. */
 176        if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
 177                goto discard_fq;
 178
 179        err = pskb_trim_rcsum(skb, end - offset);
 180        if (err)
 181                goto discard_fq;
 182
 183        /* Note : skb->rbnode and skb->dev share the same location. */
 184        dev = skb->dev;
 185        /* Makes sure compiler wont do silly aliasing games */
 186        barrier();
 187
 188        prev_tail = fq->q.fragments_tail;
 189        err = inet_frag_queue_insert(&fq->q, skb, offset, end);
 190        if (err)
 191                goto insert_error;
 192
 193        if (dev)
 194                fq->iif = dev->ifindex;
 195
 196        fq->q.stamp = skb->tstamp;
 197        fq->q.meat += skb->len;
 198        fq->ecn |= ecn;
 199        add_frag_mem_limit(fq->q.net, skb->truesize);
 200
 201        fragsize = -skb_network_offset(skb) + skb->len;
 202        if (fragsize > fq->q.max_size)
 203                fq->q.max_size = fragsize;
 204
 205        /* The first fragment.
 206         * nhoffset is obtained from the first fragment, of course.
 207         */
 208        if (offset == 0) {
 209                fq->nhoffset = nhoff;
 210                fq->q.flags |= INET_FRAG_FIRST_IN;
 211        }
 212
 213        if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 214            fq->q.meat == fq->q.len) {
 215                unsigned long orefdst = skb->_skb_refdst;
 216
 217                skb->_skb_refdst = 0UL;
 218                err = ip6_frag_reasm(fq, skb, prev_tail, dev);
 219                skb->_skb_refdst = orefdst;
 220                return err;
 221        }
 222
 223        skb_dst_drop(skb);
 224        return -EINPROGRESS;
 225
 226insert_error:
 227        if (err == IPFRAG_DUP) {
 228                kfree_skb(skb);
 229                return -EINVAL;
 230        }
 231        err = -EINVAL;
 232        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 233                        IPSTATS_MIB_REASM_OVERLAPS);
 234discard_fq:
 235        inet_frag_kill(&fq->q);
 236        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 237                        IPSTATS_MIB_REASMFAILS);
 238err:
 239        kfree_skb(skb);
 240        return err;
 241}
 242
 243/*
 244 *      Check if this packet is complete.
 245 *
 246 *      It is called with locked fq, and caller must check that
 247 *      queue is eligible for reassembly i.e. it is not COMPLETE,
 248 *      the last and the first frames arrived and all the bits are here.
 249 */
 250static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 251                          struct sk_buff *prev_tail, struct net_device *dev)
 252{
 253        struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
 254        unsigned int nhoff;
 255        void *reasm_data;
 256        int payload_len;
 257        u8 ecn;
 258
 259        inet_frag_kill(&fq->q);
 260
 261        ecn = ip_frag_ecn_table[fq->ecn];
 262        if (unlikely(ecn == 0xff))
 263                goto out_fail;
 264
 265        reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
 266        if (!reasm_data)
 267                goto out_oom;
 268
 269        payload_len = ((skb->data - skb_network_header(skb)) -
 270                       sizeof(struct ipv6hdr) + fq->q.len -
 271                       sizeof(struct frag_hdr));
 272        if (payload_len > IPV6_MAXPLEN)
 273                goto out_oversize;
 274
 275        /* We have to remove fragment header from datagram and to relocate
 276         * header in order to calculate ICV correctly. */
 277        nhoff = fq->nhoffset;
 278        skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
 279        memmove(skb->head + sizeof(struct frag_hdr), skb->head,
 280                (skb->data - skb->head) - sizeof(struct frag_hdr));
 281        if (skb_mac_header_was_set(skb))
 282                skb->mac_header += sizeof(struct frag_hdr);
 283        skb->network_header += sizeof(struct frag_hdr);
 284
 285        skb_reset_transport_header(skb);
 286
 287        inet_frag_reasm_finish(&fq->q, skb, reasm_data);
 288
 289        skb->dev = dev;
 290        ipv6_hdr(skb)->payload_len = htons(payload_len);
 291        ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
 292        IP6CB(skb)->nhoff = nhoff;
 293        IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 294        IP6CB(skb)->frag_max_size = fq->q.max_size;
 295
 296        /* Yes, and fold redundant checksum back. 8) */
 297        skb_postpush_rcsum(skb, skb_network_header(skb),
 298                           skb_network_header_len(skb));
 299
 300        rcu_read_lock();
 301        __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
 302        rcu_read_unlock();
 303        fq->q.rb_fragments = RB_ROOT;
 304        fq->q.fragments_tail = NULL;
 305        fq->q.last_run_head = NULL;
 306        return 1;
 307
 308out_oversize:
 309        net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
 310        goto out_fail;
 311out_oom:
 312        net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
 313out_fail:
 314        rcu_read_lock();
 315        __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
 316        rcu_read_unlock();
 317        inet_frag_kill(&fq->q);
 318        return -1;
 319}
 320
 321static int ipv6_frag_rcv(struct sk_buff *skb)
 322{
 323        struct frag_hdr *fhdr;
 324        struct frag_queue *fq;
 325        const struct ipv6hdr *hdr = ipv6_hdr(skb);
 326        struct net *net = dev_net(skb_dst(skb)->dev);
 327        int iif;
 328
 329        if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
 330                goto fail_hdr;
 331
 332        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 333
 334        /* Jumbo payload inhibits frag. header */
 335        if (hdr->payload_len == 0)
 336                goto fail_hdr;
 337
 338        if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
 339                                 sizeof(struct frag_hdr))))
 340                goto fail_hdr;
 341
 342        hdr = ipv6_hdr(skb);
 343        fhdr = (struct frag_hdr *)skb_transport_header(skb);
 344
 345        if (!(fhdr->frag_off & htons(0xFFF9))) {
 346                /* It is not a fragmented frame */
 347                skb->transport_header += sizeof(struct frag_hdr);
 348                __IP6_INC_STATS(net,
 349                                ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 350
 351                IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 352                IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 353                return 1;
 354        }
 355
 356        iif = skb->dev ? skb->dev->ifindex : 0;
 357        fq = fq_find(net, fhdr->identification, hdr, iif);
 358        if (fq) {
 359                u32 prob_offset = 0;
 360                int ret;
 361
 362                spin_lock(&fq->q.lock);
 363
 364                fq->iif = iif;
 365                ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
 366                                     &prob_offset);
 367
 368                spin_unlock(&fq->q.lock);
 369                inet_frag_put(&fq->q);
 370                if (prob_offset) {
 371                        __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 372                                        IPSTATS_MIB_INHDRERRORS);
 373                        /* icmpv6_param_prob() calls kfree_skb(skb) */
 374                        icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
 375                }
 376                return ret;
 377        }
 378
 379        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
 380        kfree_skb(skb);
 381        return -1;
 382
 383fail_hdr:
 384        __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 385                        IPSTATS_MIB_INHDRERRORS);
 386        icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
 387        return -1;
 388}
 389
 390static const struct inet6_protocol frag_protocol = {
 391        .handler        =       ipv6_frag_rcv,
 392        .flags          =       INET6_PROTO_NOPOLICY,
 393};
 394
 395#ifdef CONFIG_SYSCTL
 396
 397static struct ctl_table ip6_frags_ns_ctl_table[] = {
 398        {
 399                .procname       = "ip6frag_high_thresh",
 400                .data           = &init_net.ipv6.frags.high_thresh,
 401                .maxlen         = sizeof(unsigned long),
 402                .mode           = 0644,
 403                .proc_handler   = proc_doulongvec_minmax,
 404                .extra1         = &init_net.ipv6.frags.low_thresh
 405        },
 406        {
 407                .procname       = "ip6frag_low_thresh",
 408                .data           = &init_net.ipv6.frags.low_thresh,
 409                .maxlen         = sizeof(unsigned long),
 410                .mode           = 0644,
 411                .proc_handler   = proc_doulongvec_minmax,
 412                .extra2         = &init_net.ipv6.frags.high_thresh
 413        },
 414        {
 415                .procname       = "ip6frag_time",
 416                .data           = &init_net.ipv6.frags.timeout,
 417                .maxlen         = sizeof(int),
 418                .mode           = 0644,
 419                .proc_handler   = proc_dointvec_jiffies,
 420        },
 421        { }
 422};
 423
 424/* secret interval has been deprecated */
 425static int ip6_frags_secret_interval_unused;
 426static struct ctl_table ip6_frags_ctl_table[] = {
 427        {
 428                .procname       = "ip6frag_secret_interval",
 429                .data           = &ip6_frags_secret_interval_unused,
 430                .maxlen         = sizeof(int),
 431                .mode           = 0644,
 432                .proc_handler   = proc_dointvec_jiffies,
 433        },
 434        { }
 435};
 436
 437static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 438{
 439        struct ctl_table *table;
 440        struct ctl_table_header *hdr;
 441
 442        table = ip6_frags_ns_ctl_table;
 443        if (!net_eq(net, &init_net)) {
 444                table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
 445                if (!table)
 446                        goto err_alloc;
 447
 448                table[0].data = &net->ipv6.frags.high_thresh;
 449                table[0].extra1 = &net->ipv6.frags.low_thresh;
 450                table[1].data = &net->ipv6.frags.low_thresh;
 451                table[1].extra2 = &net->ipv6.frags.high_thresh;
 452                table[2].data = &net->ipv6.frags.timeout;
 453        }
 454
 455        hdr = register_net_sysctl(net, "net/ipv6", table);
 456        if (!hdr)
 457                goto err_reg;
 458
 459        net->ipv6.sysctl.frags_hdr = hdr;
 460        return 0;
 461
 462err_reg:
 463        if (!net_eq(net, &init_net))
 464                kfree(table);
 465err_alloc:
 466        return -ENOMEM;
 467}
 468
 469static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
 470{
 471        struct ctl_table *table;
 472
 473        table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
 474        unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
 475        if (!net_eq(net, &init_net))
 476                kfree(table);
 477}
 478
 479static struct ctl_table_header *ip6_ctl_header;
 480
 481static int ip6_frags_sysctl_register(void)
 482{
 483        ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
 484                        ip6_frags_ctl_table);
 485        return ip6_ctl_header == NULL ? -ENOMEM : 0;
 486}
 487
 488static void ip6_frags_sysctl_unregister(void)
 489{
 490        unregister_net_sysctl_table(ip6_ctl_header);
 491}
 492#else
 493static int ip6_frags_ns_sysctl_register(struct net *net)
 494{
 495        return 0;
 496}
 497
 498static void ip6_frags_ns_sysctl_unregister(struct net *net)
 499{
 500}
 501
 502static int ip6_frags_sysctl_register(void)
 503{
 504        return 0;
 505}
 506
 507static void ip6_frags_sysctl_unregister(void)
 508{
 509}
 510#endif
 511
 512static int __net_init ipv6_frags_init_net(struct net *net)
 513{
 514        int res;
 515
 516        net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 517        net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 518        net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 519        net->ipv6.frags.f = &ip6_frags;
 520
 521        res = inet_frags_init_net(&net->ipv6.frags);
 522        if (res < 0)
 523                return res;
 524
 525        res = ip6_frags_ns_sysctl_register(net);
 526        if (res < 0)
 527                inet_frags_exit_net(&net->ipv6.frags);
 528        return res;
 529}
 530
 531static void __net_exit ipv6_frags_exit_net(struct net *net)
 532{
 533        ip6_frags_ns_sysctl_unregister(net);
 534        inet_frags_exit_net(&net->ipv6.frags);
 535}
 536
 537static struct pernet_operations ip6_frags_ops = {
 538        .init = ipv6_frags_init_net,
 539        .exit = ipv6_frags_exit_net,
 540};
 541
 542static const struct rhashtable_params ip6_rhash_params = {
 543        .head_offset            = offsetof(struct inet_frag_queue, node),
 544        .hashfn                 = ip6frag_key_hashfn,
 545        .obj_hashfn             = ip6frag_obj_hashfn,
 546        .obj_cmpfn              = ip6frag_obj_cmpfn,
 547        .automatic_shrinking    = true,
 548};
 549
 550int __init ipv6_frag_init(void)
 551{
 552        int ret;
 553
 554        ip6_frags.constructor = ip6frag_init;
 555        ip6_frags.destructor = NULL;
 556        ip6_frags.qsize = sizeof(struct frag_queue);
 557        ip6_frags.frag_expire = ip6_frag_expire;
 558        ip6_frags.frags_cache_name = ip6_frag_cache_name;
 559        ip6_frags.rhash_params = ip6_rhash_params;
 560        ret = inet_frags_init(&ip6_frags);
 561        if (ret)
 562                goto out;
 563
 564        ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 565        if (ret)
 566                goto err_protocol;
 567
 568        ret = ip6_frags_sysctl_register();
 569        if (ret)
 570                goto err_sysctl;
 571
 572        ret = register_pernet_subsys(&ip6_frags_ops);
 573        if (ret)
 574                goto err_pernet;
 575
 576out:
 577        return ret;
 578
 579err_pernet:
 580        ip6_frags_sysctl_unregister();
 581err_sysctl:
 582        inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 583err_protocol:
 584        inet_frags_fini(&ip6_frags);
 585        goto out;
 586}
 587
 588void ipv6_frag_exit(void)
 589{
 590        inet_frags_fini(&ip6_frags);
 591        ip6_frags_sysctl_unregister();
 592        unregister_pernet_subsys(&ip6_frags_ops);
 593        inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 594}
 595