qemu/net/eth.c
<<
>>
Prefs
   1/*
   2 * QEMU network structures definitions and helper functions
   3 *
   4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
   5 *
   6 * Developed by Daynix Computing LTD (http://www.daynix.com)
   7 *
   8 * Authors:
   9 * Dmitry Fleytman <dmitry@daynix.com>
  10 * Tamir Shomer <tamirs@daynix.com>
  11 * Yan Vugenfirer <yan@daynix.com>
  12 *
  13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14 * See the COPYING file in the top-level directory.
  15 *
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "net/eth.h"
  20#include "net/checksum.h"
  21#include "qemu-common.h"
  22#include "net/tap.h"
  23
  24void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
  25    uint16_t vlan_ethtype, bool *is_new)
  26{
  27    struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  28
  29    switch (be16_to_cpu(ehdr->h_proto)) {
  30    case ETH_P_VLAN:
  31    case ETH_P_DVLAN:
  32        /* vlan hdr exists */
  33        *is_new = false;
  34        break;
  35
  36    default:
  37        /* No VLAN header, put a new one */
  38        vhdr->h_proto = ehdr->h_proto;
  39        ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  40        *is_new = true;
  41        break;
  42    }
  43    vhdr->h_tci = cpu_to_be16(vlan_tag);
  44}
  45
  46uint8_t
  47eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  48{
  49    uint8_t ecn_state = 0;
  50
  51    if (l3_proto == ETH_P_IP) {
  52        struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  53
  54        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  55            if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  56                ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  57            }
  58            if (l4proto == IP_PROTO_TCP) {
  59                return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  60            } else if (l4proto == IP_PROTO_UDP) {
  61                return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  62            }
  63        }
  64    } else if (l3_proto == ETH_P_IPV6) {
  65        struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  66
  67        if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  68            ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  69        }
  70
  71        if (l4proto == IP_PROTO_TCP) {
  72            return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  73        }
  74    }
  75
  76    /* Unsupported offload */
  77    g_assert_not_reached();
  78
  79    return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  80}
  81
  82uint16_t
  83eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  84{
  85    uint16_t proto;
  86    size_t copied;
  87    size_t size = iov_size(l2hdr_iov, iovcnt);
  88    size_t proto_offset = l2hdr_len - sizeof(proto);
  89
  90    if (size < proto_offset) {
  91        return ETH_P_UNKNOWN;
  92    }
  93
  94    copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  95                        &proto, sizeof(proto));
  96
  97    return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  98}
  99
 100static bool
 101_eth_copy_chunk(size_t input_size,
 102                const struct iovec *iov, int iovcnt,
 103                size_t offset, size_t length,
 104                void *buffer)
 105{
 106    size_t copied;
 107
 108    if (input_size < offset) {
 109        return false;
 110    }
 111
 112    copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
 113
 114    if (copied < length) {
 115        return false;
 116    }
 117
 118    return true;
 119}
 120
 121static bool
 122_eth_tcp_has_data(bool is_ip4,
 123                  const struct ip_header  *ip4_hdr,
 124                  const struct ip6_header *ip6_hdr,
 125                  size_t full_ip6hdr_len,
 126                  const struct tcp_header *tcp)
 127{
 128    uint32_t l4len;
 129
 130    if (is_ip4) {
 131        l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
 132    } else {
 133        size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
 134        l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
 135    }
 136
 137    return l4len > TCP_HEADER_DATA_OFFSET(tcp);
 138}
 139
 140void eth_get_protocols(const struct iovec *iov, int iovcnt,
 141                       bool *isip4, bool *isip6,
 142                       bool *isudp, bool *istcp,
 143                       size_t *l3hdr_off,
 144                       size_t *l4hdr_off,
 145                       size_t *l5hdr_off,
 146                       eth_ip6_hdr_info *ip6hdr_info,
 147                       eth_ip4_hdr_info *ip4hdr_info,
 148                       eth_l4_hdr_info  *l4hdr_info)
 149{
 150    int proto;
 151    bool fragment = false;
 152    size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
 153    size_t input_size = iov_size(iov, iovcnt);
 154    size_t copied;
 155
 156    *isip4 = *isip6 = *isudp = *istcp = false;
 157
 158    proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
 159
 160    *l3hdr_off = l2hdr_len;
 161
 162    if (proto == ETH_P_IP) {
 163        struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
 164
 165        if (input_size < l2hdr_len) {
 166            return;
 167        }
 168
 169        copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
 170
 171        *isip4 = true;
 172
 173        if (copied < sizeof(*iphdr)) {
 174            return;
 175        }
 176
 177        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
 178            if (iphdr->ip_p == IP_PROTO_TCP) {
 179                *istcp = true;
 180            } else if (iphdr->ip_p == IP_PROTO_UDP) {
 181                *isudp = true;
 182            }
 183        }
 184
 185        ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
 186        *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
 187
 188        fragment = ip4hdr_info->fragment;
 189    } else if (proto == ETH_P_IPV6) {
 190
 191        *isip6 = true;
 192        if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
 193                               ip6hdr_info)) {
 194            if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
 195                *istcp = true;
 196            } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
 197                *isudp = true;
 198            }
 199        } else {
 200            return;
 201        }
 202
 203        *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
 204        fragment = ip6hdr_info->fragment;
 205    }
 206
 207    if (!fragment) {
 208        if (*istcp) {
 209            *istcp = _eth_copy_chunk(input_size,
 210                                     iov, iovcnt,
 211                                     *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
 212                                     &l4hdr_info->hdr.tcp);
 213
 214            if (*istcp) {
 215                *l5hdr_off = *l4hdr_off +
 216                    TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
 217
 218                l4hdr_info->has_tcp_data =
 219                    _eth_tcp_has_data(proto == ETH_P_IP,
 220                                      &ip4hdr_info->ip4_hdr,
 221                                      &ip6hdr_info->ip6_hdr,
 222                                      *l4hdr_off - *l3hdr_off,
 223                                      &l4hdr_info->hdr.tcp);
 224            }
 225        } else if (*isudp) {
 226            *isudp = _eth_copy_chunk(input_size,
 227                                     iov, iovcnt,
 228                                     *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
 229                                     &l4hdr_info->hdr.udp);
 230            *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
 231        }
 232    }
 233}
 234
 235size_t
 236eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
 237               uint8_t *new_ehdr_buf,
 238               uint16_t *payload_offset, uint16_t *tci)
 239{
 240    struct vlan_header vlan_hdr;
 241    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 242
 243    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 244                               new_ehdr, sizeof(*new_ehdr));
 245
 246    if (copied < sizeof(*new_ehdr)) {
 247        return 0;
 248    }
 249
 250    switch (be16_to_cpu(new_ehdr->h_proto)) {
 251    case ETH_P_VLAN:
 252    case ETH_P_DVLAN:
 253        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 254                            &vlan_hdr, sizeof(vlan_hdr));
 255
 256        if (copied < sizeof(vlan_hdr)) {
 257            return 0;
 258        }
 259
 260        new_ehdr->h_proto = vlan_hdr.h_proto;
 261
 262        *tci = be16_to_cpu(vlan_hdr.h_tci);
 263        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 264
 265        if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
 266
 267            copied = iov_to_buf(iov, iovcnt, *payload_offset,
 268                                PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
 269
 270            if (copied < sizeof(vlan_hdr)) {
 271                return 0;
 272            }
 273
 274            *payload_offset += sizeof(vlan_hdr);
 275
 276            return sizeof(struct eth_header) + sizeof(struct vlan_header);
 277        } else {
 278            return sizeof(struct eth_header);
 279        }
 280    default:
 281        return 0;
 282    }
 283}
 284
 285size_t
 286eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
 287                  uint16_t vet, uint8_t *new_ehdr_buf,
 288                  uint16_t *payload_offset, uint16_t *tci)
 289{
 290    struct vlan_header vlan_hdr;
 291    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 292
 293    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 294                               new_ehdr, sizeof(*new_ehdr));
 295
 296    if (copied < sizeof(*new_ehdr)) {
 297        return 0;
 298    }
 299
 300    if (be16_to_cpu(new_ehdr->h_proto) == vet) {
 301        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 302                            &vlan_hdr, sizeof(vlan_hdr));
 303
 304        if (copied < sizeof(vlan_hdr)) {
 305            return 0;
 306        }
 307
 308        new_ehdr->h_proto = vlan_hdr.h_proto;
 309
 310        *tci = be16_to_cpu(vlan_hdr.h_tci);
 311        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 312        return sizeof(struct eth_header);
 313    }
 314
 315    return 0;
 316}
 317
 318void
 319eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
 320                            void *l3hdr, size_t l3hdr_len,
 321                            size_t l3payload_len,
 322                            size_t frag_offset, bool more_frags)
 323{
 324    const struct iovec l2vec = {
 325        .iov_base = (void *) l2hdr,
 326        .iov_len = l2hdr_len
 327    };
 328
 329    if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
 330        uint16_t orig_flags;
 331        struct ip_header *iphdr = (struct ip_header *) l3hdr;
 332        uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
 333        uint16_t new_ip_off;
 334
 335        assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
 336        assert((frag_off_units & ~IP_OFFMASK) == 0);
 337
 338        orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
 339        new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
 340        iphdr->ip_off = cpu_to_be16(new_ip_off);
 341        iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
 342    }
 343}
 344
 345void
 346eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
 347{
 348    struct ip_header *iphdr = (struct ip_header *) l3hdr;
 349    iphdr->ip_sum = 0;
 350    iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
 351}
 352
 353uint32_t
 354eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
 355                             uint16_t csl,
 356                             uint32_t *cso)
 357{
 358    struct ip_pseudo_header ipph;
 359    ipph.ip_src = iphdr->ip_src;
 360    ipph.ip_dst = iphdr->ip_dst;
 361    ipph.ip_payload = cpu_to_be16(csl);
 362    ipph.ip_proto = iphdr->ip_p;
 363    ipph.zeros = 0;
 364    *cso = sizeof(ipph);
 365    return net_checksum_add(*cso, (uint8_t *) &ipph);
 366}
 367
 368uint32_t
 369eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
 370                             uint16_t csl,
 371                             uint8_t l4_proto,
 372                             uint32_t *cso)
 373{
 374    struct ip6_pseudo_header ipph;
 375    ipph.ip6_src = iphdr->ip6_src;
 376    ipph.ip6_dst = iphdr->ip6_dst;
 377    ipph.len = cpu_to_be16(csl);
 378    ipph.zero[0] = 0;
 379    ipph.zero[1] = 0;
 380    ipph.zero[2] = 0;
 381    ipph.next_hdr = l4_proto;
 382    *cso = sizeof(ipph);
 383    return net_checksum_add(*cso, (uint8_t *)&ipph);
 384}
 385
 386static bool
 387eth_is_ip6_extension_header_type(uint8_t hdr_type)
 388{
 389    switch (hdr_type) {
 390    case IP6_HOP_BY_HOP:
 391    case IP6_ROUTING:
 392    case IP6_FRAGMENT:
 393    case IP6_ESP:
 394    case IP6_AUTHENTICATION:
 395    case IP6_DESTINATON:
 396    case IP6_MOBILITY:
 397        return true;
 398    default:
 399        return false;
 400    }
 401}
 402
 403static bool
 404_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
 405                        size_t rthdr_offset,
 406                        struct ip6_ext_hdr *ext_hdr,
 407                        struct in6_address *dst_addr)
 408{
 409    struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
 410
 411    if ((rthdr->rtype == 2) &&
 412        (rthdr->len == sizeof(struct in6_address) / 8) &&
 413        (rthdr->segleft == 1)) {
 414
 415        size_t input_size = iov_size(pkt, pkt_frags);
 416        size_t bytes_read;
 417
 418        if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
 419            return false;
 420        }
 421
 422        bytes_read = iov_to_buf(pkt, pkt_frags,
 423                                rthdr_offset + sizeof(*ext_hdr),
 424                                dst_addr, sizeof(*dst_addr));
 425
 426        return bytes_read == sizeof(*dst_addr);
 427    }
 428
 429    return false;
 430}
 431
 432static bool
 433_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
 434                        size_t dsthdr_offset,
 435                        struct ip6_ext_hdr *ext_hdr,
 436                        struct in6_address *src_addr)
 437{
 438    size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
 439    struct ip6_option_hdr opthdr;
 440    size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
 441
 442    while (bytes_left > sizeof(opthdr)) {
 443        size_t input_size = iov_size(pkt, pkt_frags);
 444        size_t bytes_read, optlen;
 445
 446        if (input_size < opt_offset) {
 447            return false;
 448        }
 449
 450        bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
 451                                &opthdr, sizeof(opthdr));
 452
 453        if (bytes_read != sizeof(opthdr)) {
 454            return false;
 455        }
 456
 457        optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
 458                                               : (opthdr.len + sizeof(opthdr));
 459
 460        if (optlen > bytes_left) {
 461            return false;
 462        }
 463
 464        if (opthdr.type == IP6_OPT_HOME) {
 465            size_t input_size = iov_size(pkt, pkt_frags);
 466
 467            if (input_size < opt_offset + sizeof(opthdr)) {
 468                return false;
 469            }
 470
 471            bytes_read = iov_to_buf(pkt, pkt_frags,
 472                                    opt_offset + sizeof(opthdr),
 473                                    src_addr, sizeof(*src_addr));
 474
 475            return bytes_read == sizeof(*src_addr);
 476        }
 477
 478        opt_offset += optlen;
 479        bytes_left -= optlen;
 480    }
 481
 482    return false;
 483}
 484
 485bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
 486                        size_t ip6hdr_off, eth_ip6_hdr_info *info)
 487{
 488    struct ip6_ext_hdr ext_hdr;
 489    size_t bytes_read;
 490    uint8_t curr_ext_hdr_type;
 491    size_t input_size = iov_size(pkt, pkt_frags);
 492
 493    info->rss_ex_dst_valid = false;
 494    info->rss_ex_src_valid = false;
 495    info->fragment = false;
 496
 497    if (input_size < ip6hdr_off) {
 498        return false;
 499    }
 500
 501    bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
 502                            &info->ip6_hdr, sizeof(info->ip6_hdr));
 503    if (bytes_read < sizeof(info->ip6_hdr)) {
 504        return false;
 505    }
 506
 507    info->full_hdr_len = sizeof(struct ip6_header);
 508
 509    curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
 510
 511    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
 512        info->l4proto = info->ip6_hdr.ip6_nxt;
 513        info->has_ext_hdrs = false;
 514        return true;
 515    }
 516
 517    info->has_ext_hdrs = true;
 518
 519    do {
 520        if (input_size < ip6hdr_off + info->full_hdr_len) {
 521            return false;
 522        }
 523
 524        bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
 525                                &ext_hdr, sizeof(ext_hdr));
 526
 527        if (bytes_read < sizeof(ext_hdr)) {
 528            return false;
 529        }
 530
 531        if (curr_ext_hdr_type == IP6_ROUTING) {
 532            info->rss_ex_dst_valid =
 533                _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
 534                                         ip6hdr_off + info->full_hdr_len,
 535                                         &ext_hdr, &info->rss_ex_dst);
 536        } else if (curr_ext_hdr_type == IP6_DESTINATON) {
 537            info->rss_ex_src_valid =
 538                _eth_get_rss_ex_src_addr(pkt, pkt_frags,
 539                                         ip6hdr_off + info->full_hdr_len,
 540                                         &ext_hdr, &info->rss_ex_src);
 541        } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
 542            info->fragment = true;
 543        }
 544
 545        info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
 546        curr_ext_hdr_type = ext_hdr.ip6r_nxt;
 547    } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
 548
 549    info->l4proto = ext_hdr.ip6r_nxt;
 550    return true;
 551}
 552