linux/net/ipv4/udp_offload.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPV4 GSO/GRO offload support
   4 *      Linux INET implementation
   5 *
   6 *      UDPv4 GSO support
   7 */
   8
   9#include <linux/skbuff.h>
  10#include <net/udp.h>
  11#include <net/protocol.h>
  12#include <net/inet_common.h>
  13
  14static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
  15        netdev_features_t features,
  16        struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
  17                                             netdev_features_t features),
  18        __be16 new_protocol, bool is_ipv6)
  19{
  20        int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
  21        bool remcsum, need_csum, offload_csum, gso_partial;
  22        struct sk_buff *segs = ERR_PTR(-EINVAL);
  23        struct udphdr *uh = udp_hdr(skb);
  24        u16 mac_offset = skb->mac_header;
  25        __be16 protocol = skb->protocol;
  26        u16 mac_len = skb->mac_len;
  27        int udp_offset, outer_hlen;
  28        __wsum partial;
  29        bool need_ipsec;
  30
  31        if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
  32                goto out;
  33
  34        /* Adjust partial header checksum to negate old length.
  35         * We cannot rely on the value contained in uh->len as it is
  36         * possible that the actual value exceeds the boundaries of the
  37         * 16 bit length field due to the header being added outside of an
  38         * IP or IPv6 frame that was already limited to 64K - 1.
  39         */
  40        if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)
  41                partial = (__force __wsum)uh->len;
  42        else
  43                partial = (__force __wsum)htonl(skb->len);
  44        partial = csum_sub(csum_unfold(uh->check), partial);
  45
  46        /* setup inner skb. */
  47        skb->encapsulation = 0;
  48        SKB_GSO_CB(skb)->encap_level = 0;
  49        __skb_pull(skb, tnl_hlen);
  50        skb_reset_mac_header(skb);
  51        skb_set_network_header(skb, skb_inner_network_offset(skb));
  52        skb->mac_len = skb_inner_network_offset(skb);
  53        skb->protocol = new_protocol;
  54
  55        need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
  56        skb->encap_hdr_csum = need_csum;
  57
  58        remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
  59        skb->remcsum_offload = remcsum;
  60
  61        need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
  62        /* Try to offload checksum if possible */
  63        offload_csum = !!(need_csum &&
  64                          !need_ipsec &&
  65                          (skb->dev->features &
  66                           (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
  67                                      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
  68
  69        features &= skb->dev->hw_enc_features;
  70
  71        /* The only checksum offload we care about from here on out is the
  72         * outer one so strip the existing checksum feature flags and
  73         * instead set the flag based on our outer checksum offload value.
  74         */
  75        if (remcsum) {
  76                features &= ~NETIF_F_CSUM_MASK;
  77                if (!need_csum || offload_csum)
  78                        features |= NETIF_F_HW_CSUM;
  79        }
  80
  81        /* segment inner packet. */
  82        segs = gso_inner_segment(skb, features);
  83        if (IS_ERR_OR_NULL(segs)) {
  84                skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
  85                                     mac_len);
  86                goto out;
  87        }
  88
  89        gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
  90
  91        outer_hlen = skb_tnl_header_len(skb);
  92        udp_offset = outer_hlen - tnl_hlen;
  93        skb = segs;
  94        do {
  95                unsigned int len;
  96
  97                if (remcsum)
  98                        skb->ip_summed = CHECKSUM_NONE;
  99
 100                /* Set up inner headers if we are offloading inner checksum */
 101                if (skb->ip_summed == CHECKSUM_PARTIAL) {
 102                        skb_reset_inner_headers(skb);
 103                        skb->encapsulation = 1;
 104                }
 105
 106                skb->mac_len = mac_len;
 107                skb->protocol = protocol;
 108
 109                __skb_push(skb, outer_hlen);
 110                skb_reset_mac_header(skb);
 111                skb_set_network_header(skb, mac_len);
 112                skb_set_transport_header(skb, udp_offset);
 113                len = skb->len - udp_offset;
 114                uh = udp_hdr(skb);
 115
 116                /* If we are only performing partial GSO the inner header
 117                 * will be using a length value equal to only one MSS sized
 118                 * segment instead of the entire frame.
 119                 */
 120                if (gso_partial && skb_is_gso(skb)) {
 121                        uh->len = htons(skb_shinfo(skb)->gso_size +
 122                                        SKB_GSO_CB(skb)->data_offset +
 123                                        skb->head - (unsigned char *)uh);
 124                } else {
 125                        uh->len = htons(len);
 126                }
 127
 128                if (!need_csum)
 129                        continue;
 130
 131                uh->check = ~csum_fold(csum_add(partial,
 132                                       (__force __wsum)htonl(len)));
 133
 134                if (skb->encapsulation || !offload_csum) {
 135                        uh->check = gso_make_checksum(skb, ~uh->check);
 136                        if (uh->check == 0)
 137                                uh->check = CSUM_MANGLED_0;
 138                } else {
 139                        skb->ip_summed = CHECKSUM_PARTIAL;
 140                        skb->csum_start = skb_transport_header(skb) - skb->head;
 141                        skb->csum_offset = offsetof(struct udphdr, check);
 142                }
 143        } while ((skb = skb->next));
 144out:
 145        return segs;
 146}
 147
 148struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 149                                       netdev_features_t features,
 150                                       bool is_ipv6)
 151{
 152        __be16 protocol = skb->protocol;
 153        const struct net_offload **offloads;
 154        const struct net_offload *ops;
 155        struct sk_buff *segs = ERR_PTR(-EINVAL);
 156        struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
 157                                             netdev_features_t features);
 158
 159        rcu_read_lock();
 160
 161        switch (skb->inner_protocol_type) {
 162        case ENCAP_TYPE_ETHER:
 163                protocol = skb->inner_protocol;
 164                gso_inner_segment = skb_mac_gso_segment;
 165                break;
 166        case ENCAP_TYPE_IPPROTO:
 167                offloads = is_ipv6 ? inet6_offloads : inet_offloads;
 168                ops = rcu_dereference(offloads[skb->inner_ipproto]);
 169                if (!ops || !ops->callbacks.gso_segment)
 170                        goto out_unlock;
 171                gso_inner_segment = ops->callbacks.gso_segment;
 172                break;
 173        default:
 174                goto out_unlock;
 175        }
 176
 177        segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
 178                                        protocol, is_ipv6);
 179
 180out_unlock:
 181        rcu_read_unlock();
 182
 183        return segs;
 184}
 185EXPORT_SYMBOL(skb_udp_tunnel_segment);
 186
 187struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 188                                  netdev_features_t features)
 189{
 190        struct sock *sk = gso_skb->sk;
 191        unsigned int sum_truesize = 0;
 192        struct sk_buff *segs, *seg;
 193        struct udphdr *uh;
 194        unsigned int mss;
 195        bool copy_dtor;
 196        __sum16 check;
 197        __be16 newlen;
 198
 199        mss = skb_shinfo(gso_skb)->gso_size;
 200        if (gso_skb->len <= sizeof(*uh) + mss)
 201                return ERR_PTR(-EINVAL);
 202
 203        skb_pull(gso_skb, sizeof(*uh));
 204
 205        /* clear destructor to avoid skb_segment assigning it to tail */
 206        copy_dtor = gso_skb->destructor == sock_wfree;
 207        if (copy_dtor)
 208                gso_skb->destructor = NULL;
 209
 210        segs = skb_segment(gso_skb, features);
 211        if (IS_ERR_OR_NULL(segs)) {
 212                if (copy_dtor)
 213                        gso_skb->destructor = sock_wfree;
 214                return segs;
 215        }
 216
 217        /* GSO partial and frag_list segmentation only requires splitting
 218         * the frame into an MSS multiple and possibly a remainder, both
 219         * cases return a GSO skb. So update the mss now.
 220         */
 221        if (skb_is_gso(segs))
 222                mss *= skb_shinfo(segs)->gso_segs;
 223
 224        seg = segs;
 225        uh = udp_hdr(seg);
 226
 227        /* preserve TX timestamp flags and TS key for first segment */
 228        skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey;
 229        skb_shinfo(seg)->tx_flags |=
 230                        (skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP);
 231
 232        /* compute checksum adjustment based on old length versus new */
 233        newlen = htons(sizeof(*uh) + mss);
 234        check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
 235
 236        for (;;) {
 237                if (copy_dtor) {
 238                        seg->destructor = sock_wfree;
 239                        seg->sk = sk;
 240                        sum_truesize += seg->truesize;
 241                }
 242
 243                if (!seg->next)
 244                        break;
 245
 246                uh->len = newlen;
 247                uh->check = check;
 248
 249                if (seg->ip_summed == CHECKSUM_PARTIAL)
 250                        gso_reset_checksum(seg, ~check);
 251                else
 252                        uh->check = gso_make_checksum(seg, ~check) ? :
 253                                    CSUM_MANGLED_0;
 254
 255                seg = seg->next;
 256                uh = udp_hdr(seg);
 257        }
 258
 259        /* last packet can be partial gso_size, account for that in checksum */
 260        newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
 261                       seg->data_len);
 262        check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
 263
 264        uh->len = newlen;
 265        uh->check = check;
 266
 267        if (seg->ip_summed == CHECKSUM_PARTIAL)
 268                gso_reset_checksum(seg, ~check);
 269        else
 270                uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
 271
 272        /* update refcount for the packet */
 273        if (copy_dtor) {
 274                int delta = sum_truesize - gso_skb->truesize;
 275
 276                /* In some pathological cases, delta can be negative.
 277                 * We need to either use refcount_add() or refcount_sub_and_test()
 278                 */
 279                if (likely(delta >= 0))
 280                        refcount_add(delta, &sk->sk_wmem_alloc);
 281                else
 282                        WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
 283        }
 284        return segs;
 285}
 286EXPORT_SYMBOL_GPL(__udp_gso_segment);
 287
 288static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 289                                         netdev_features_t features)
 290{
 291        struct sk_buff *segs = ERR_PTR(-EINVAL);
 292        unsigned int mss;
 293        __wsum csum;
 294        struct udphdr *uh;
 295        struct iphdr *iph;
 296
 297        if (skb->encapsulation &&
 298            (skb_shinfo(skb)->gso_type &
 299             (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
 300                segs = skb_udp_tunnel_segment(skb, features, false);
 301                goto out;
 302        }
 303
 304        if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
 305                goto out;
 306
 307        if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 308                goto out;
 309
 310        if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
 311                return __udp_gso_segment(skb, features);
 312
 313        mss = skb_shinfo(skb)->gso_size;
 314        if (unlikely(skb->len <= mss))
 315                goto out;
 316
 317        /* Do software UFO. Complete and fill in the UDP checksum as
 318         * HW cannot do checksum of UDP packets sent as multiple
 319         * IP fragments.
 320         */
 321
 322        uh = udp_hdr(skb);
 323        iph = ip_hdr(skb);
 324
 325        uh->check = 0;
 326        csum = skb_checksum(skb, 0, skb->len, 0);
 327        uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
 328        if (uh->check == 0)
 329                uh->check = CSUM_MANGLED_0;
 330
 331        skb->ip_summed = CHECKSUM_UNNECESSARY;
 332
 333        /* If there is no outer header we can fake a checksum offload
 334         * due to the fact that we have already done the checksum in
 335         * software prior to segmenting the frame.
 336         */
 337        if (!skb->encap_hdr_csum)
 338                features |= NETIF_F_HW_CSUM;
 339
 340        /* Fragment the skb. IP headers of the fragments are updated in
 341         * inet_gso_segment()
 342         */
 343        segs = skb_segment(skb, features);
 344out:
 345        return segs;
 346}
 347
 348#define UDP_GRO_CNT_MAX 64
 349static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 350                                               struct sk_buff *skb)
 351{
 352        struct udphdr *uh = udp_hdr(skb);
 353        struct sk_buff *pp = NULL;
 354        struct udphdr *uh2;
 355        struct sk_buff *p;
 356        unsigned int ulen;
 357
 358        /* requires non zero csum, for symmetry with GSO */
 359        if (!uh->check) {
 360                NAPI_GRO_CB(skb)->flush = 1;
 361                return NULL;
 362        }
 363
 364        /* Do not deal with padded or malicious packets, sorry ! */
 365        ulen = ntohs(uh->len);
 366        if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) {
 367                NAPI_GRO_CB(skb)->flush = 1;
 368                return NULL;
 369        }
 370        /* pull encapsulating udp header */
 371        skb_gro_pull(skb, sizeof(struct udphdr));
 372        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 373
 374        list_for_each_entry(p, head, list) {
 375                if (!NAPI_GRO_CB(p)->same_flow)
 376                        continue;
 377
 378                uh2 = udp_hdr(p);
 379
 380                /* Match ports only, as csum is always non zero */
 381                if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
 382                        NAPI_GRO_CB(p)->same_flow = 0;
 383                        continue;
 384                }
 385
 386                /* Terminate the flow on len mismatch or if it grow "too much".
 387                 * Under small packet flood GRO count could elsewhere grow a lot
 388                 * leading to excessive truesize values.
 389                 * On len mismatch merge the first packet shorter than gso_size,
 390                 * otherwise complete the GRO packet.
 391                 */
 392                if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
 393                    ulen != ntohs(uh2->len) ||
 394                    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
 395                        pp = p;
 396
 397                return pp;
 398        }
 399
 400        /* mismatch, but we never need to flush */
 401        return NULL;
 402}
 403
 404INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 405                                                   __be16 sport, __be16 dport));
 406struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 407                                struct udphdr *uh, udp_lookup_t lookup)
 408{
 409        struct sk_buff *pp = NULL;
 410        struct sk_buff *p;
 411        struct udphdr *uh2;
 412        unsigned int off = skb_gro_offset(skb);
 413        int flush = 1;
 414        struct sock *sk;
 415
 416        rcu_read_lock();
 417        sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
 418                                udp4_lib_lookup_skb, skb, uh->source, uh->dest);
 419        if (!sk)
 420                goto out_unlock;
 421
 422        if (udp_sk(sk)->gro_enabled) {
 423                pp = call_gro_receive(udp_gro_receive_segment, head, skb);
 424                rcu_read_unlock();
 425                return pp;
 426        }
 427
 428        if (NAPI_GRO_CB(skb)->encap_mark ||
 429            (skb->ip_summed != CHECKSUM_PARTIAL &&
 430             NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 431             !NAPI_GRO_CB(skb)->csum_valid) ||
 432            !udp_sk(sk)->gro_receive)
 433                goto out_unlock;
 434
 435        /* mark that this skb passed once through the tunnel gro layer */
 436        NAPI_GRO_CB(skb)->encap_mark = 1;
 437
 438        flush = 0;
 439
 440        list_for_each_entry(p, head, list) {
 441                if (!NAPI_GRO_CB(p)->same_flow)
 442                        continue;
 443
 444                uh2 = (struct udphdr   *)(p->data + off);
 445
 446                /* Match ports and either checksums are either both zero
 447                 * or nonzero.
 448                 */
 449                if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
 450                    (!uh->check ^ !uh2->check)) {
 451                        NAPI_GRO_CB(p)->same_flow = 0;
 452                        continue;
 453                }
 454        }
 455
 456        skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
 457        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 458        pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
 459
 460out_unlock:
 461        rcu_read_unlock();
 462        skb_gro_flush_final(skb, pp, flush);
 463        return pp;
 464}
 465EXPORT_SYMBOL(udp_gro_receive);
 466
 467INDIRECT_CALLABLE_SCOPE
 468struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 469{
 470        struct udphdr *uh = udp_gro_udphdr(skb);
 471
 472        if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
 473                goto flush;
 474
 475        /* Don't bother verifying checksum if we're going to flush anyway. */
 476        if (NAPI_GRO_CB(skb)->flush)
 477                goto skip;
 478
 479        if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
 480                                                 inet_gro_compute_pseudo))
 481                goto flush;
 482        else if (uh->check)
 483                skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 484                                             inet_gro_compute_pseudo);
 485skip:
 486        NAPI_GRO_CB(skb)->is_ipv6 = 0;
 487        return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
 488
 489flush:
 490        NAPI_GRO_CB(skb)->flush = 1;
 491        return NULL;
 492}
 493
 494static int udp_gro_complete_segment(struct sk_buff *skb)
 495{
 496        struct udphdr *uh = udp_hdr(skb);
 497
 498        skb->csum_start = (unsigned char *)uh - skb->head;
 499        skb->csum_offset = offsetof(struct udphdr, check);
 500        skb->ip_summed = CHECKSUM_PARTIAL;
 501
 502        skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 503        skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
 504        return 0;
 505}
 506
 507int udp_gro_complete(struct sk_buff *skb, int nhoff,
 508                     udp_lookup_t lookup)
 509{
 510        __be16 newlen = htons(skb->len - nhoff);
 511        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 512        int err = -ENOSYS;
 513        struct sock *sk;
 514
 515        uh->len = newlen;
 516
 517        rcu_read_lock();
 518        sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
 519                                udp4_lib_lookup_skb, skb, uh->source, uh->dest);
 520        if (sk && udp_sk(sk)->gro_enabled) {
 521                err = udp_gro_complete_segment(skb);
 522        } else if (sk && udp_sk(sk)->gro_complete) {
 523                skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
 524                                        : SKB_GSO_UDP_TUNNEL;
 525
 526                /* Set encapsulation before calling into inner gro_complete()
 527                 * functions to make them set up the inner offsets.
 528                 */
 529                skb->encapsulation = 1;
 530                err = udp_sk(sk)->gro_complete(sk, skb,
 531                                nhoff + sizeof(struct udphdr));
 532        }
 533        rcu_read_unlock();
 534
 535        if (skb->remcsum_offload)
 536                skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
 537
 538        return err;
 539}
 540EXPORT_SYMBOL(udp_gro_complete);
 541
 542INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 543{
 544        const struct iphdr *iph = ip_hdr(skb);
 545        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 546
 547        if (uh->check)
 548                uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 549                                          iph->daddr, 0);
 550
 551        return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 552}
 553
 554static const struct net_offload udpv4_offload = {
 555        .callbacks = {
 556                .gso_segment = udp4_ufo_fragment,
 557                .gro_receive  = udp4_gro_receive,
 558                .gro_complete = udp4_gro_complete,
 559        },
 560};
 561
 562int __init udpv4_offload_init(void)
 563{
 564        return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
 565}
 566