linux/net/ipv4/udp_offload.c
<<
>>
Prefs
   1/*
   2 *      IPV4 GSO/GRO offload support
   3 *      Linux INET implementation
   4 *
   5 *      This program is free software; you can redistribute it and/or
   6 *      modify it under the terms of the GNU General Public License
   7 *      as published by the Free Software Foundation; either version
   8 *      2 of the License, or (at your option) any later version.
   9 *
  10 *      UDPv4 GSO support
  11 */
  12
  13#include <linux/skbuff.h>
  14#include <net/udp.h>
  15#include <net/protocol.h>
  16
  17static DEFINE_SPINLOCK(udp_offload_lock);
  18static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
  19
  20#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))
  21
  22struct udp_offload_priv {
  23        struct udp_offload      *offload;
  24        possible_net_t  net;
  25        struct rcu_head         rcu;
  26        struct udp_offload_priv __rcu *next;
  27};
  28
  29static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
  30        netdev_features_t features,
  31        struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
  32                                             netdev_features_t features),
  33        __be16 new_protocol, bool is_ipv6)
  34{
  35        struct sk_buff *segs = ERR_PTR(-EINVAL);
  36        u16 mac_offset = skb->mac_header;
  37        int mac_len = skb->mac_len;
  38        int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
  39        __be16 protocol = skb->protocol;
  40        netdev_features_t enc_features;
  41        int udp_offset, outer_hlen;
  42        unsigned int oldlen;
  43        bool need_csum = !!(skb_shinfo(skb)->gso_type &
  44                            SKB_GSO_UDP_TUNNEL_CSUM);
  45        bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
  46        bool offload_csum = false, dont_encap = (need_csum || remcsum);
  47
  48        oldlen = (u16)~skb->len;
  49
  50        if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
  51                goto out;
  52
  53        skb->encapsulation = 0;
  54        __skb_pull(skb, tnl_hlen);
  55        skb_reset_mac_header(skb);
  56        skb_set_network_header(skb, skb_inner_network_offset(skb));
  57        skb->mac_len = skb_inner_network_offset(skb);
  58        skb->protocol = new_protocol;
  59        skb->encap_hdr_csum = need_csum;
  60        skb->remcsum_offload = remcsum;
  61
  62        /* Try to offload checksum if possible */
  63        offload_csum = !!(need_csum &&
  64                          ((skb->dev->features & NETIF_F_HW_CSUM) ||
  65                           (skb->dev->features & (is_ipv6 ?
  66                            NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
  67
  68        /* segment inner packet. */
  69        enc_features = skb->dev->hw_enc_features & features;
  70        segs = gso_inner_segment(skb, enc_features);
  71        if (IS_ERR_OR_NULL(segs)) {
  72                skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
  73                                     mac_len);
  74                goto out;
  75        }
  76
  77        outer_hlen = skb_tnl_header_len(skb);
  78        udp_offset = outer_hlen - tnl_hlen;
  79        skb = segs;
  80        do {
  81                struct udphdr *uh;
  82                int len;
  83                __be32 delta;
  84
  85                if (dont_encap) {
  86                        skb->encapsulation = 0;
  87                        skb->ip_summed = CHECKSUM_NONE;
  88                } else {
  89                        /* Only set up inner headers if we might be offloading
  90                         * inner checksum.
  91                         */
  92                        skb_reset_inner_headers(skb);
  93                        skb->encapsulation = 1;
  94                }
  95
  96                skb->mac_len = mac_len;
  97                skb->protocol = protocol;
  98
  99                skb_push(skb, outer_hlen);
 100                skb_reset_mac_header(skb);
 101                skb_set_network_header(skb, mac_len);
 102                skb_set_transport_header(skb, udp_offset);
 103                len = skb->len - udp_offset;
 104                uh = udp_hdr(skb);
 105                uh->len = htons(len);
 106
 107                if (!need_csum)
 108                        continue;
 109
 110                delta = htonl(oldlen + len);
 111
 112                uh->check = ~csum_fold((__force __wsum)
 113                                       ((__force u32)uh->check +
 114                                        (__force u32)delta));
 115                if (offload_csum) {
 116                        skb->ip_summed = CHECKSUM_PARTIAL;
 117                        skb->csum_start = skb_transport_header(skb) - skb->head;
 118                        skb->csum_offset = offsetof(struct udphdr, check);
 119                } else if (remcsum) {
 120                        /* Need to calculate checksum from scratch,
 121                         * inner checksums are never when doing
 122                         * remote_checksum_offload.
 123                         */
 124
 125                        skb->csum = skb_checksum(skb, udp_offset,
 126                                                 skb->len - udp_offset,
 127                                                 0);
 128                        uh->check = csum_fold(skb->csum);
 129                        if (uh->check == 0)
 130                                uh->check = CSUM_MANGLED_0;
 131                } else {
 132                        uh->check = gso_make_checksum(skb, ~uh->check);
 133
 134                        if (uh->check == 0)
 135                                uh->check = CSUM_MANGLED_0;
 136                }
 137        } while ((skb = skb->next));
 138out:
 139        return segs;
 140}
 141
 142struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 143                                       netdev_features_t features,
 144                                       bool is_ipv6)
 145{
 146        __be16 protocol = skb->protocol;
 147        const struct net_offload **offloads;
 148        const struct net_offload *ops;
 149        struct sk_buff *segs = ERR_PTR(-EINVAL);
 150        struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
 151                                             netdev_features_t features);
 152
 153        rcu_read_lock();
 154
 155        switch (skb->inner_protocol_type) {
 156        case ENCAP_TYPE_ETHER:
 157                protocol = skb->inner_protocol;
 158                gso_inner_segment = skb_mac_gso_segment;
 159                break;
 160        case ENCAP_TYPE_IPPROTO:
 161                offloads = is_ipv6 ? inet6_offloads : inet_offloads;
 162                ops = rcu_dereference(offloads[skb->inner_ipproto]);
 163                if (!ops || !ops->callbacks.gso_segment)
 164                        goto out_unlock;
 165                gso_inner_segment = ops->callbacks.gso_segment;
 166                break;
 167        default:
 168                goto out_unlock;
 169        }
 170
 171        segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
 172                                        protocol, is_ipv6);
 173
 174out_unlock:
 175        rcu_read_unlock();
 176
 177        return segs;
 178}
 179
 180static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 181                                         netdev_features_t features)
 182{
 183        struct sk_buff *segs = ERR_PTR(-EINVAL);
 184        unsigned int mss;
 185        __wsum csum;
 186        struct udphdr *uh;
 187        struct iphdr *iph;
 188
 189        if (skb->encapsulation &&
 190            (skb_shinfo(skb)->gso_type &
 191             (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
 192                segs = skb_udp_tunnel_segment(skb, features, false);
 193                goto out;
 194        }
 195
 196        if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 197                goto out;
 198
 199        mss = skb_shinfo(skb)->gso_size;
 200        if (unlikely(skb->len <= mss))
 201                goto out;
 202
 203        if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
 204                /* Packet is from an untrusted source, reset gso_segs. */
 205                int type = skb_shinfo(skb)->gso_type;
 206
 207                if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 208                                      SKB_GSO_UDP_TUNNEL |
 209                                      SKB_GSO_UDP_TUNNEL_CSUM |
 210                                      SKB_GSO_TUNNEL_REMCSUM |
 211                                      SKB_GSO_IPIP |
 212                                      SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
 213                             !(type & (SKB_GSO_UDP))))
 214                        goto out;
 215
 216                skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
 217
 218                segs = NULL;
 219                goto out;
 220        }
 221
 222        /* Do software UFO. Complete and fill in the UDP checksum as
 223         * HW cannot do checksum of UDP packets sent as multiple
 224         * IP fragments.
 225         */
 226
 227        uh = udp_hdr(skb);
 228        iph = ip_hdr(skb);
 229
 230        uh->check = 0;
 231        csum = skb_checksum(skb, 0, skb->len, 0);
 232        uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
 233        if (uh->check == 0)
 234                uh->check = CSUM_MANGLED_0;
 235
 236        skb->ip_summed = CHECKSUM_NONE;
 237
 238        /* Fragment the skb. IP headers of the fragments are updated in
 239         * inet_gso_segment()
 240         */
 241        segs = skb_segment(skb, features);
 242out:
 243        return segs;
 244}
 245
 246int udp_add_offload(struct net *net, struct udp_offload *uo)
 247{
 248        struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
 249
 250        if (!new_offload)
 251                return -ENOMEM;
 252
 253        write_pnet(&new_offload->net, net);
 254        new_offload->offload = uo;
 255
 256        spin_lock(&udp_offload_lock);
 257        new_offload->next = udp_offload_base;
 258        rcu_assign_pointer(udp_offload_base, new_offload);
 259        spin_unlock(&udp_offload_lock);
 260
 261        return 0;
 262}
 263EXPORT_SYMBOL(udp_add_offload);
 264
 265static void udp_offload_free_routine(struct rcu_head *head)
 266{
 267        struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
 268        kfree(ou_priv);
 269}
 270
 271void udp_del_offload(struct udp_offload *uo)
 272{
 273        struct udp_offload_priv __rcu **head = &udp_offload_base;
 274        struct udp_offload_priv *uo_priv;
 275
 276        spin_lock(&udp_offload_lock);
 277
 278        uo_priv = udp_deref_protected(*head);
 279        for (; uo_priv != NULL;
 280             uo_priv = udp_deref_protected(*head)) {
 281                if (uo_priv->offload == uo) {
 282                        rcu_assign_pointer(*head,
 283                                           udp_deref_protected(uo_priv->next));
 284                        goto unlock;
 285                }
 286                head = &uo_priv->next;
 287        }
 288        pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port));
 289unlock:
 290        spin_unlock(&udp_offload_lock);
 291        if (uo_priv)
 292                call_rcu(&uo_priv->rcu, udp_offload_free_routine);
 293}
 294EXPORT_SYMBOL(udp_del_offload);
 295
 296struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 297                                 struct udphdr *uh)
 298{
 299        struct udp_offload_priv *uo_priv;
 300        struct sk_buff *p, **pp = NULL;
 301        struct udphdr *uh2;
 302        unsigned int off = skb_gro_offset(skb);
 303        int flush = 1;
 304
 305        if (NAPI_GRO_CB(skb)->udp_mark ||
 306            (skb->ip_summed != CHECKSUM_PARTIAL &&
 307             NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 308             !NAPI_GRO_CB(skb)->csum_valid))
 309                goto out;
 310
 311        /* mark that this skb passed once through the udp gro layer */
 312        NAPI_GRO_CB(skb)->udp_mark = 1;
 313
 314        rcu_read_lock();
 315        uo_priv = rcu_dereference(udp_offload_base);
 316        for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
 317                if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
 318                    uo_priv->offload->port == uh->dest &&
 319                    uo_priv->offload->callbacks.gro_receive)
 320                        goto unflush;
 321        }
 322        goto out_unlock;
 323
 324unflush:
 325        flush = 0;
 326
 327        for (p = *head; p; p = p->next) {
 328                if (!NAPI_GRO_CB(p)->same_flow)
 329                        continue;
 330
 331                uh2 = (struct udphdr   *)(p->data + off);
 332
 333                /* Match ports and either checksums are either both zero
 334                 * or nonzero.
 335                 */
 336                if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
 337                    (!uh->check ^ !uh2->check)) {
 338                        NAPI_GRO_CB(p)->same_flow = 0;
 339                        continue;
 340                }
 341        }
 342
 343        skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
 344        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 345        NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
 346        pp = uo_priv->offload->callbacks.gro_receive(head, skb,
 347                                                     uo_priv->offload);
 348
 349out_unlock:
 350        rcu_read_unlock();
 351out:
 352        NAPI_GRO_CB(skb)->flush |= flush;
 353        return pp;
 354}
 355
 356static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
 357                                         struct sk_buff *skb)
 358{
 359        struct udphdr *uh = udp_gro_udphdr(skb);
 360
 361        if (unlikely(!uh))
 362                goto flush;
 363
 364        /* Don't bother verifying checksum if we're going to flush anyway. */
 365        if (NAPI_GRO_CB(skb)->flush)
 366                goto skip;
 367
 368        if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
 369                                                 inet_gro_compute_pseudo))
 370                goto flush;
 371        else if (uh->check)
 372                skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 373                                             inet_gro_compute_pseudo);
 374skip:
 375        NAPI_GRO_CB(skb)->is_ipv6 = 0;
 376        return udp_gro_receive(head, skb, uh);
 377
 378flush:
 379        NAPI_GRO_CB(skb)->flush = 1;
 380        return NULL;
 381}
 382
 383int udp_gro_complete(struct sk_buff *skb, int nhoff)
 384{
 385        struct udp_offload_priv *uo_priv;
 386        __be16 newlen = htons(skb->len - nhoff);
 387        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 388        int err = -ENOSYS;
 389
 390        uh->len = newlen;
 391
 392        rcu_read_lock();
 393
 394        uo_priv = rcu_dereference(udp_offload_base);
 395        for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
 396                if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
 397                    uo_priv->offload->port == uh->dest &&
 398                    uo_priv->offload->callbacks.gro_complete)
 399                        break;
 400        }
 401
 402        if (uo_priv) {
 403                NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
 404                err = uo_priv->offload->callbacks.gro_complete(skb,
 405                                nhoff + sizeof(struct udphdr),
 406                                uo_priv->offload);
 407        }
 408
 409        rcu_read_unlock();
 410
 411        if (skb->remcsum_offload)
 412                skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
 413
 414        skb->encapsulation = 1;
 415        skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));
 416
 417        return err;
 418}
 419
 420static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 421{
 422        const struct iphdr *iph = ip_hdr(skb);
 423        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 424
 425        if (uh->check) {
 426                skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
 427                uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 428                                          iph->daddr, 0);
 429        } else {
 430                skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 431        }
 432
 433        return udp_gro_complete(skb, nhoff);
 434}
 435
 436static const struct net_offload udpv4_offload = {
 437        .callbacks = {
 438                .gso_segment = udp4_ufo_fragment,
 439                .gro_receive  = udp4_gro_receive,
 440                .gro_complete = udp4_gro_complete,
 441        },
 442};
 443
 444int __init udpv4_offload_init(void)
 445{
 446        return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
 447}
 448