qemu/net/eth.c
<<
>>
Prefs
   1/*
   2 * QEMU network structures definitions and helper functions
   3 *
   4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
   5 *
   6 * Developed by Daynix Computing LTD (http://www.daynix.com)
   7 *
   8 * Authors:
   9 * Dmitry Fleytman <dmitry@daynix.com>
  10 * Tamir Shomer <tamirs@daynix.com>
  11 * Yan Vugenfirer <yan@daynix.com>
  12 *
  13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14 * See the COPYING file in the top-level directory.
  15 *
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "qemu/log.h"
  20#include "net/eth.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23
  24void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
  25    uint16_t vlan_ethtype, bool *is_new)
  26{
  27    struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  28
  29    switch (be16_to_cpu(ehdr->h_proto)) {
  30    case ETH_P_VLAN:
  31    case ETH_P_DVLAN:
  32        /* vlan hdr exists */
  33        *is_new = false;
  34        break;
  35
  36    default:
  37        /* No VLAN header, put a new one */
  38        vhdr->h_proto = ehdr->h_proto;
  39        ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  40        *is_new = true;
  41        break;
  42    }
  43    vhdr->h_tci = cpu_to_be16(vlan_tag);
  44}
  45
  46uint8_t
  47eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  48{
  49    uint8_t ecn_state = 0;
  50
  51    if (l3_proto == ETH_P_IP) {
  52        struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  53
  54        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  55            if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  56                ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  57            }
  58            if (l4proto == IP_PROTO_TCP) {
  59                return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  60            } else if (l4proto == IP_PROTO_UDP) {
  61                return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  62            }
  63        }
  64    } else if (l3_proto == ETH_P_IPV6) {
  65        struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  66
  67        if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  68            ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  69        }
  70
  71        if (l4proto == IP_PROTO_TCP) {
  72            return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  73        }
  74    }
  75    qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
  76        "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
  77
  78    return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  79}
  80
  81uint16_t
  82eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  83{
  84    uint16_t proto;
  85    size_t copied;
  86    size_t size = iov_size(l2hdr_iov, iovcnt);
  87    size_t proto_offset = l2hdr_len - sizeof(proto);
  88
  89    if (size < proto_offset) {
  90        return ETH_P_UNKNOWN;
  91    }
  92
  93    copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  94                        &proto, sizeof(proto));
  95
  96    return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  97}
  98
  99static bool
 100_eth_copy_chunk(size_t input_size,
 101                const struct iovec *iov, int iovcnt,
 102                size_t offset, size_t length,
 103                void *buffer)
 104{
 105    size_t copied;
 106
 107    if (input_size < offset) {
 108        return false;
 109    }
 110
 111    copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
 112
 113    if (copied < length) {
 114        return false;
 115    }
 116
 117    return true;
 118}
 119
 120static bool
 121_eth_tcp_has_data(bool is_ip4,
 122                  const struct ip_header  *ip4_hdr,
 123                  const struct ip6_header *ip6_hdr,
 124                  size_t full_ip6hdr_len,
 125                  const struct tcp_header *tcp)
 126{
 127    uint32_t l4len;
 128
 129    if (is_ip4) {
 130        l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
 131    } else {
 132        size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
 133        l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
 134    }
 135
 136    return l4len > TCP_HEADER_DATA_OFFSET(tcp);
 137}
 138
 139void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
 140                       bool *hasip4, bool *hasip6,
 141                       size_t *l3hdr_off,
 142                       size_t *l4hdr_off,
 143                       size_t *l5hdr_off,
 144                       eth_ip6_hdr_info *ip6hdr_info,
 145                       eth_ip4_hdr_info *ip4hdr_info,
 146                       eth_l4_hdr_info  *l4hdr_info)
 147{
 148    int proto;
 149    bool fragment = false;
 150    size_t input_size = iov_size(iov, iovcnt);
 151    size_t copied;
 152    uint8_t ip_p;
 153
 154    *hasip4 = *hasip6 = false;
 155    *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
 156    l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
 157
 158    proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
 159
 160    if (proto == ETH_P_IP) {
 161        struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
 162
 163        if (input_size < *l3hdr_off) {
 164            return;
 165        }
 166
 167        copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
 168        if (copied < sizeof(*iphdr) ||
 169            IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
 170            return;
 171        }
 172
 173        *hasip4 = true;
 174        ip_p = iphdr->ip_p;
 175        ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
 176        *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
 177
 178        fragment = ip4hdr_info->fragment;
 179    } else if (proto == ETH_P_IPV6) {
 180        if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
 181            return;
 182        }
 183
 184        *hasip6 = true;
 185        ip_p = ip6hdr_info->l4proto;
 186        *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
 187        fragment = ip6hdr_info->fragment;
 188    } else {
 189        return;
 190    }
 191
 192    if (fragment) {
 193        return;
 194    }
 195
 196    switch (ip_p) {
 197    case IP_PROTO_TCP:
 198        if (_eth_copy_chunk(input_size,
 199                            iov, iovcnt,
 200                            *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
 201                            &l4hdr_info->hdr.tcp)) {
 202            l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
 203            *l5hdr_off = *l4hdr_off +
 204                TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
 205
 206            l4hdr_info->has_tcp_data =
 207                _eth_tcp_has_data(proto == ETH_P_IP,
 208                                  &ip4hdr_info->ip4_hdr,
 209                                  &ip6hdr_info->ip6_hdr,
 210                                  *l4hdr_off - *l3hdr_off,
 211                                  &l4hdr_info->hdr.tcp);
 212        }
 213        break;
 214
 215    case IP_PROTO_UDP:
 216        if (_eth_copy_chunk(input_size,
 217                            iov, iovcnt,
 218                            *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
 219                            &l4hdr_info->hdr.udp)) {
 220            l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
 221            *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
 222        }
 223        break;
 224    }
 225}
 226
 227size_t
 228eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
 229               uint8_t *new_ehdr_buf,
 230               uint16_t *payload_offset, uint16_t *tci)
 231{
 232    struct vlan_header vlan_hdr;
 233    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 234
 235    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 236                               new_ehdr, sizeof(*new_ehdr));
 237
 238    if (copied < sizeof(*new_ehdr)) {
 239        return 0;
 240    }
 241
 242    switch (be16_to_cpu(new_ehdr->h_proto)) {
 243    case ETH_P_VLAN:
 244    case ETH_P_DVLAN:
 245        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 246                            &vlan_hdr, sizeof(vlan_hdr));
 247
 248        if (copied < sizeof(vlan_hdr)) {
 249            return 0;
 250        }
 251
 252        new_ehdr->h_proto = vlan_hdr.h_proto;
 253
 254        *tci = be16_to_cpu(vlan_hdr.h_tci);
 255        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 256
 257        if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
 258
 259            copied = iov_to_buf(iov, iovcnt, *payload_offset,
 260                                PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
 261
 262            if (copied < sizeof(vlan_hdr)) {
 263                return 0;
 264            }
 265
 266            *payload_offset += sizeof(vlan_hdr);
 267
 268            return sizeof(struct eth_header) + sizeof(struct vlan_header);
 269        } else {
 270            return sizeof(struct eth_header);
 271        }
 272    default:
 273        return 0;
 274    }
 275}
 276
 277size_t
 278eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
 279                  uint16_t vet, uint8_t *new_ehdr_buf,
 280                  uint16_t *payload_offset, uint16_t *tci)
 281{
 282    struct vlan_header vlan_hdr;
 283    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 284
 285    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 286                               new_ehdr, sizeof(*new_ehdr));
 287
 288    if (copied < sizeof(*new_ehdr)) {
 289        return 0;
 290    }
 291
 292    if (be16_to_cpu(new_ehdr->h_proto) == vet) {
 293        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 294                            &vlan_hdr, sizeof(vlan_hdr));
 295
 296        if (copied < sizeof(vlan_hdr)) {
 297            return 0;
 298        }
 299
 300        new_ehdr->h_proto = vlan_hdr.h_proto;
 301
 302        *tci = be16_to_cpu(vlan_hdr.h_tci);
 303        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 304        return sizeof(struct eth_header);
 305    }
 306
 307    return 0;
 308}
 309
 310void
 311eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
 312{
 313    struct ip_header *iphdr = (struct ip_header *) l3hdr;
 314    iphdr->ip_sum = 0;
 315    iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
 316}
 317
 318uint32_t
 319eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
 320                             uint16_t csl,
 321                             uint32_t *cso)
 322{
 323    struct ip_pseudo_header ipph;
 324    ipph.ip_src = iphdr->ip_src;
 325    ipph.ip_dst = iphdr->ip_dst;
 326    ipph.ip_payload = cpu_to_be16(csl);
 327    ipph.ip_proto = iphdr->ip_p;
 328    ipph.zeros = 0;
 329    *cso = sizeof(ipph);
 330    return net_checksum_add(*cso, (uint8_t *) &ipph);
 331}
 332
 333uint32_t
 334eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
 335                             uint16_t csl,
 336                             uint8_t l4_proto,
 337                             uint32_t *cso)
 338{
 339    struct ip6_pseudo_header ipph;
 340    ipph.ip6_src = iphdr->ip6_src;
 341    ipph.ip6_dst = iphdr->ip6_dst;
 342    ipph.len = cpu_to_be16(csl);
 343    ipph.zero[0] = 0;
 344    ipph.zero[1] = 0;
 345    ipph.zero[2] = 0;
 346    ipph.next_hdr = l4_proto;
 347    *cso = sizeof(ipph);
 348    return net_checksum_add(*cso, (uint8_t *)&ipph);
 349}
 350
 351static bool
 352eth_is_ip6_extension_header_type(uint8_t hdr_type)
 353{
 354    switch (hdr_type) {
 355    case IP6_HOP_BY_HOP:
 356    case IP6_ROUTING:
 357    case IP6_FRAGMENT:
 358    case IP6_AUTHENTICATION:
 359    case IP6_DESTINATON:
 360    case IP6_MOBILITY:
 361        return true;
 362    default:
 363        return false;
 364    }
 365}
 366
 367static bool
 368_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
 369                        size_t ext_hdr_offset,
 370                        struct ip6_ext_hdr *ext_hdr,
 371                        struct in6_address *dst_addr)
 372{
 373    struct ip6_ext_hdr_routing rt_hdr;
 374    size_t input_size = iov_size(pkt, pkt_frags);
 375    size_t bytes_read;
 376
 377    if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
 378        return false;
 379    }
 380
 381    bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
 382                            &rt_hdr, sizeof(rt_hdr));
 383    assert(bytes_read == sizeof(rt_hdr));
 384    if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
 385        return false;
 386    }
 387    bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
 388                            dst_addr, sizeof(*dst_addr));
 389    assert(bytes_read == sizeof(*dst_addr));
 390
 391    return true;
 392}
 393
 394static bool
 395_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
 396                        size_t dsthdr_offset,
 397                        struct ip6_ext_hdr *ext_hdr,
 398                        struct in6_address *src_addr)
 399{
 400    size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
 401    struct ip6_option_hdr opthdr;
 402    size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
 403
 404    while (bytes_left > sizeof(opthdr)) {
 405        size_t input_size = iov_size(pkt, pkt_frags);
 406        size_t bytes_read, optlen;
 407
 408        if (input_size < opt_offset) {
 409            return false;
 410        }
 411
 412        bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
 413                                &opthdr, sizeof(opthdr));
 414
 415        if (bytes_read != sizeof(opthdr)) {
 416            return false;
 417        }
 418
 419        optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
 420                                               : (opthdr.len + sizeof(opthdr));
 421
 422        if (optlen > bytes_left) {
 423            return false;
 424        }
 425
 426        if (opthdr.type == IP6_OPT_HOME) {
 427            size_t input_size = iov_size(pkt, pkt_frags);
 428
 429            if (input_size < opt_offset + sizeof(opthdr)) {
 430                return false;
 431            }
 432
 433            bytes_read = iov_to_buf(pkt, pkt_frags,
 434                                    opt_offset + sizeof(opthdr),
 435                                    src_addr, sizeof(*src_addr));
 436
 437            return bytes_read == sizeof(*src_addr);
 438        }
 439
 440        opt_offset += optlen;
 441        bytes_left -= optlen;
 442    }
 443
 444    return false;
 445}
 446
 447bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
 448                        size_t ip6hdr_off, eth_ip6_hdr_info *info)
 449{
 450    struct ip6_ext_hdr ext_hdr;
 451    size_t bytes_read;
 452    uint8_t curr_ext_hdr_type;
 453    size_t input_size = iov_size(pkt, pkt_frags);
 454
 455    info->rss_ex_dst_valid = false;
 456    info->rss_ex_src_valid = false;
 457    info->fragment = false;
 458
 459    if (input_size < ip6hdr_off) {
 460        return false;
 461    }
 462
 463    bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
 464                            &info->ip6_hdr, sizeof(info->ip6_hdr));
 465    if (bytes_read < sizeof(info->ip6_hdr)) {
 466        return false;
 467    }
 468
 469    info->full_hdr_len = sizeof(struct ip6_header);
 470
 471    curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
 472
 473    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
 474        info->l4proto = info->ip6_hdr.ip6_nxt;
 475        info->has_ext_hdrs = false;
 476        return true;
 477    }
 478
 479    info->has_ext_hdrs = true;
 480
 481    do {
 482        if (input_size < ip6hdr_off + info->full_hdr_len) {
 483            return false;
 484        }
 485
 486        bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
 487                                &ext_hdr, sizeof(ext_hdr));
 488
 489        if (bytes_read < sizeof(ext_hdr)) {
 490            return false;
 491        }
 492
 493        if (curr_ext_hdr_type == IP6_ROUTING) {
 494            if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
 495                info->rss_ex_dst_valid =
 496                    _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
 497                                             ip6hdr_off + info->full_hdr_len,
 498                                             &ext_hdr, &info->rss_ex_dst);
 499            }
 500        } else if (curr_ext_hdr_type == IP6_DESTINATON) {
 501            info->rss_ex_src_valid =
 502                _eth_get_rss_ex_src_addr(pkt, pkt_frags,
 503                                         ip6hdr_off + info->full_hdr_len,
 504                                         &ext_hdr, &info->rss_ex_src);
 505        } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
 506            info->fragment = true;
 507        }
 508
 509        info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
 510        curr_ext_hdr_type = ext_hdr.ip6r_nxt;
 511    } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
 512
 513    info->l4proto = ext_hdr.ip6r_nxt;
 514    return true;
 515}
 516
 517bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
 518                         const void *pkt, size_t pkt_size)
 519{
 520    assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
 521
 522    if (pkt_size >= ETH_ZLEN) {
 523        return false;
 524    }
 525
 526    /* pad to minimum Ethernet frame length */
 527    memcpy(padded_pkt, pkt, pkt_size);
 528    memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
 529    *padded_buflen = ETH_ZLEN;
 530
 531    return true;
 532}
 533