qemu/net/eth.c
<<
>>
Prefs
   1/*
   2 * QEMU network structures definitions and helper functions
   3 *
   4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
   5 *
   6 * Developed by Daynix Computing LTD (http://www.daynix.com)
   7 *
   8 * Authors:
   9 * Dmitry Fleytman <dmitry@daynix.com>
  10 * Tamir Shomer <tamirs@daynix.com>
  11 * Yan Vugenfirer <yan@daynix.com>
  12 *
  13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14 * See the COPYING file in the top-level directory.
  15 *
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "qemu/log.h"
  20#include "net/eth.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23
  24void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
  25    uint16_t vlan_ethtype, bool *is_new)
  26{
  27    struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  28
  29    switch (be16_to_cpu(ehdr->h_proto)) {
  30    case ETH_P_VLAN:
  31    case ETH_P_DVLAN:
  32        /* vlan hdr exists */
  33        *is_new = false;
  34        break;
  35
  36    default:
  37        /* No VLAN header, put a new one */
  38        vhdr->h_proto = ehdr->h_proto;
  39        ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  40        *is_new = true;
  41        break;
  42    }
  43    vhdr->h_tci = cpu_to_be16(vlan_tag);
  44}
  45
  46uint8_t
  47eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  48{
  49    uint8_t ecn_state = 0;
  50
  51    if (l3_proto == ETH_P_IP) {
  52        struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  53
  54        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  55            if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  56                ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  57            }
  58            if (l4proto == IP_PROTO_TCP) {
  59                return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  60            } else if (l4proto == IP_PROTO_UDP) {
  61                return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  62            }
  63        }
  64    } else if (l3_proto == ETH_P_IPV6) {
  65        struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  66
  67        if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  68            ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  69        }
  70
  71        if (l4proto == IP_PROTO_TCP) {
  72            return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  73        }
  74    }
  75    qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
  76        "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
  77
  78    return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  79}
  80
  81uint16_t
  82eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  83{
  84    uint16_t proto;
  85    size_t copied;
  86    size_t size = iov_size(l2hdr_iov, iovcnt);
  87    size_t proto_offset = l2hdr_len - sizeof(proto);
  88
  89    if (size < proto_offset) {
  90        return ETH_P_UNKNOWN;
  91    }
  92
  93    copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  94                        &proto, sizeof(proto));
  95
  96    return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  97}
  98
  99static bool
 100_eth_copy_chunk(size_t input_size,
 101                const struct iovec *iov, int iovcnt,
 102                size_t offset, size_t length,
 103                void *buffer)
 104{
 105    size_t copied;
 106
 107    if (input_size < offset) {
 108        return false;
 109    }
 110
 111    copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
 112
 113    if (copied < length) {
 114        return false;
 115    }
 116
 117    return true;
 118}
 119
 120static bool
 121_eth_tcp_has_data(bool is_ip4,
 122                  const struct ip_header  *ip4_hdr,
 123                  const struct ip6_header *ip6_hdr,
 124                  size_t full_ip6hdr_len,
 125                  const struct tcp_header *tcp)
 126{
 127    uint32_t l4len;
 128
 129    if (is_ip4) {
 130        l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
 131    } else {
 132        size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
 133        l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
 134    }
 135
 136    return l4len > TCP_HEADER_DATA_OFFSET(tcp);
 137}
 138
 139void eth_get_protocols(const struct iovec *iov, int iovcnt,
 140                       bool *isip4, bool *isip6,
 141                       bool *isudp, bool *istcp,
 142                       size_t *l3hdr_off,
 143                       size_t *l4hdr_off,
 144                       size_t *l5hdr_off,
 145                       eth_ip6_hdr_info *ip6hdr_info,
 146                       eth_ip4_hdr_info *ip4hdr_info,
 147                       eth_l4_hdr_info  *l4hdr_info)
 148{
 149    int proto;
 150    bool fragment = false;
 151    size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
 152    size_t input_size = iov_size(iov, iovcnt);
 153    size_t copied;
 154
 155    *isip4 = *isip6 = *isudp = *istcp = false;
 156
 157    proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
 158
 159    *l3hdr_off = l2hdr_len;
 160
 161    if (proto == ETH_P_IP) {
 162        struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
 163
 164        if (input_size < l2hdr_len) {
 165            return;
 166        }
 167
 168        copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
 169
 170        *isip4 = true;
 171
 172        if (copied < sizeof(*iphdr)) {
 173            return;
 174        }
 175
 176        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
 177            if (iphdr->ip_p == IP_PROTO_TCP) {
 178                *istcp = true;
 179            } else if (iphdr->ip_p == IP_PROTO_UDP) {
 180                *isudp = true;
 181            }
 182        }
 183
 184        ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
 185        *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
 186
 187        fragment = ip4hdr_info->fragment;
 188    } else if (proto == ETH_P_IPV6) {
 189
 190        *isip6 = true;
 191        if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
 192                               ip6hdr_info)) {
 193            if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
 194                *istcp = true;
 195            } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
 196                *isudp = true;
 197            }
 198        } else {
 199            return;
 200        }
 201
 202        *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
 203        fragment = ip6hdr_info->fragment;
 204    }
 205
 206    if (!fragment) {
 207        if (*istcp) {
 208            *istcp = _eth_copy_chunk(input_size,
 209                                     iov, iovcnt,
 210                                     *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
 211                                     &l4hdr_info->hdr.tcp);
 212
 213            if (*istcp) {
 214                *l5hdr_off = *l4hdr_off +
 215                    TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
 216
 217                l4hdr_info->has_tcp_data =
 218                    _eth_tcp_has_data(proto == ETH_P_IP,
 219                                      &ip4hdr_info->ip4_hdr,
 220                                      &ip6hdr_info->ip6_hdr,
 221                                      *l4hdr_off - *l3hdr_off,
 222                                      &l4hdr_info->hdr.tcp);
 223            }
 224        } else if (*isudp) {
 225            *isudp = _eth_copy_chunk(input_size,
 226                                     iov, iovcnt,
 227                                     *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
 228                                     &l4hdr_info->hdr.udp);
 229            *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
 230        }
 231    }
 232}
 233
 234size_t
 235eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
 236               uint8_t *new_ehdr_buf,
 237               uint16_t *payload_offset, uint16_t *tci)
 238{
 239    struct vlan_header vlan_hdr;
 240    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 241
 242    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 243                               new_ehdr, sizeof(*new_ehdr));
 244
 245    if (copied < sizeof(*new_ehdr)) {
 246        return 0;
 247    }
 248
 249    switch (be16_to_cpu(new_ehdr->h_proto)) {
 250    case ETH_P_VLAN:
 251    case ETH_P_DVLAN:
 252        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 253                            &vlan_hdr, sizeof(vlan_hdr));
 254
 255        if (copied < sizeof(vlan_hdr)) {
 256            return 0;
 257        }
 258
 259        new_ehdr->h_proto = vlan_hdr.h_proto;
 260
 261        *tci = be16_to_cpu(vlan_hdr.h_tci);
 262        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 263
 264        if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
 265
 266            copied = iov_to_buf(iov, iovcnt, *payload_offset,
 267                                PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
 268
 269            if (copied < sizeof(vlan_hdr)) {
 270                return 0;
 271            }
 272
 273            *payload_offset += sizeof(vlan_hdr);
 274
 275            return sizeof(struct eth_header) + sizeof(struct vlan_header);
 276        } else {
 277            return sizeof(struct eth_header);
 278        }
 279    default:
 280        return 0;
 281    }
 282}
 283
 284size_t
 285eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
 286                  uint16_t vet, uint8_t *new_ehdr_buf,
 287                  uint16_t *payload_offset, uint16_t *tci)
 288{
 289    struct vlan_header vlan_hdr;
 290    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 291
 292    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 293                               new_ehdr, sizeof(*new_ehdr));
 294
 295    if (copied < sizeof(*new_ehdr)) {
 296        return 0;
 297    }
 298
 299    if (be16_to_cpu(new_ehdr->h_proto) == vet) {
 300        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 301                            &vlan_hdr, sizeof(vlan_hdr));
 302
 303        if (copied < sizeof(vlan_hdr)) {
 304            return 0;
 305        }
 306
 307        new_ehdr->h_proto = vlan_hdr.h_proto;
 308
 309        *tci = be16_to_cpu(vlan_hdr.h_tci);
 310        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 311        return sizeof(struct eth_header);
 312    }
 313
 314    return 0;
 315}
 316
 317void
 318eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
 319                            void *l3hdr, size_t l3hdr_len,
 320                            size_t l3payload_len,
 321                            size_t frag_offset, bool more_frags)
 322{
 323    const struct iovec l2vec = {
 324        .iov_base = (void *) l2hdr,
 325        .iov_len = l2hdr_len
 326    };
 327
 328    if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
 329        uint16_t orig_flags;
 330        struct ip_header *iphdr = (struct ip_header *) l3hdr;
 331        uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
 332        uint16_t new_ip_off;
 333
 334        assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
 335        assert((frag_off_units & ~IP_OFFMASK) == 0);
 336
 337        orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
 338        new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
 339        iphdr->ip_off = cpu_to_be16(new_ip_off);
 340        iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
 341    }
 342}
 343
 344void
 345eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
 346{
 347    struct ip_header *iphdr = (struct ip_header *) l3hdr;
 348    iphdr->ip_sum = 0;
 349    iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
 350}
 351
 352uint32_t
 353eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
 354                             uint16_t csl,
 355                             uint32_t *cso)
 356{
 357    struct ip_pseudo_header ipph;
 358    ipph.ip_src = iphdr->ip_src;
 359    ipph.ip_dst = iphdr->ip_dst;
 360    ipph.ip_payload = cpu_to_be16(csl);
 361    ipph.ip_proto = iphdr->ip_p;
 362    ipph.zeros = 0;
 363    *cso = sizeof(ipph);
 364    return net_checksum_add(*cso, (uint8_t *) &ipph);
 365}
 366
 367uint32_t
 368eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
 369                             uint16_t csl,
 370                             uint8_t l4_proto,
 371                             uint32_t *cso)
 372{
 373    struct ip6_pseudo_header ipph;
 374    ipph.ip6_src = iphdr->ip6_src;
 375    ipph.ip6_dst = iphdr->ip6_dst;
 376    ipph.len = cpu_to_be16(csl);
 377    ipph.zero[0] = 0;
 378    ipph.zero[1] = 0;
 379    ipph.zero[2] = 0;
 380    ipph.next_hdr = l4_proto;
 381    *cso = sizeof(ipph);
 382    return net_checksum_add(*cso, (uint8_t *)&ipph);
 383}
 384
 385static bool
 386eth_is_ip6_extension_header_type(uint8_t hdr_type)
 387{
 388    switch (hdr_type) {
 389    case IP6_HOP_BY_HOP:
 390    case IP6_ROUTING:
 391    case IP6_FRAGMENT:
 392    case IP6_ESP:
 393    case IP6_AUTHENTICATION:
 394    case IP6_DESTINATON:
 395    case IP6_MOBILITY:
 396        return true;
 397    default:
 398        return false;
 399    }
 400}
 401
 402static bool
 403_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
 404                        size_t ext_hdr_offset,
 405                        struct ip6_ext_hdr *ext_hdr,
 406                        struct in6_address *dst_addr)
 407{
 408    struct ip6_ext_hdr_routing rt_hdr;
 409    size_t input_size = iov_size(pkt, pkt_frags);
 410    size_t bytes_read;
 411
 412    if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
 413        return false;
 414    }
 415
 416    bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
 417                            &rt_hdr, sizeof(rt_hdr));
 418    assert(bytes_read == sizeof(rt_hdr));
 419    if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
 420        return false;
 421    }
 422    bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
 423                            dst_addr, sizeof(*dst_addr));
 424    assert(bytes_read == sizeof(*dst_addr));
 425
 426    return true;
 427}
 428
 429static bool
 430_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
 431                        size_t dsthdr_offset,
 432                        struct ip6_ext_hdr *ext_hdr,
 433                        struct in6_address *src_addr)
 434{
 435    size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
 436    struct ip6_option_hdr opthdr;
 437    size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
 438
 439    while (bytes_left > sizeof(opthdr)) {
 440        size_t input_size = iov_size(pkt, pkt_frags);
 441        size_t bytes_read, optlen;
 442
 443        if (input_size < opt_offset) {
 444            return false;
 445        }
 446
 447        bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
 448                                &opthdr, sizeof(opthdr));
 449
 450        if (bytes_read != sizeof(opthdr)) {
 451            return false;
 452        }
 453
 454        optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
 455                                               : (opthdr.len + sizeof(opthdr));
 456
 457        if (optlen > bytes_left) {
 458            return false;
 459        }
 460
 461        if (opthdr.type == IP6_OPT_HOME) {
 462            size_t input_size = iov_size(pkt, pkt_frags);
 463
 464            if (input_size < opt_offset + sizeof(opthdr)) {
 465                return false;
 466            }
 467
 468            bytes_read = iov_to_buf(pkt, pkt_frags,
 469                                    opt_offset + sizeof(opthdr),
 470                                    src_addr, sizeof(*src_addr));
 471
 472            return bytes_read == sizeof(*src_addr);
 473        }
 474
 475        opt_offset += optlen;
 476        bytes_left -= optlen;
 477    }
 478
 479    return false;
 480}
 481
 482bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
 483                        size_t ip6hdr_off, eth_ip6_hdr_info *info)
 484{
 485    struct ip6_ext_hdr ext_hdr;
 486    size_t bytes_read;
 487    uint8_t curr_ext_hdr_type;
 488    size_t input_size = iov_size(pkt, pkt_frags);
 489
 490    info->rss_ex_dst_valid = false;
 491    info->rss_ex_src_valid = false;
 492    info->fragment = false;
 493
 494    if (input_size < ip6hdr_off) {
 495        return false;
 496    }
 497
 498    bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
 499                            &info->ip6_hdr, sizeof(info->ip6_hdr));
 500    if (bytes_read < sizeof(info->ip6_hdr)) {
 501        return false;
 502    }
 503
 504    info->full_hdr_len = sizeof(struct ip6_header);
 505
 506    curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
 507
 508    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
 509        info->l4proto = info->ip6_hdr.ip6_nxt;
 510        info->has_ext_hdrs = false;
 511        return true;
 512    }
 513
 514    info->has_ext_hdrs = true;
 515
 516    do {
 517        if (input_size < ip6hdr_off + info->full_hdr_len) {
 518            return false;
 519        }
 520
 521        bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
 522                                &ext_hdr, sizeof(ext_hdr));
 523
 524        if (bytes_read < sizeof(ext_hdr)) {
 525            return false;
 526        }
 527
 528        if (curr_ext_hdr_type == IP6_ROUTING) {
 529            if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
 530                info->rss_ex_dst_valid =
 531                    _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
 532                                             ip6hdr_off + info->full_hdr_len,
 533                                             &ext_hdr, &info->rss_ex_dst);
 534            }
 535        } else if (curr_ext_hdr_type == IP6_DESTINATON) {
 536            info->rss_ex_src_valid =
 537                _eth_get_rss_ex_src_addr(pkt, pkt_frags,
 538                                         ip6hdr_off + info->full_hdr_len,
 539                                         &ext_hdr, &info->rss_ex_src);
 540        } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
 541            info->fragment = true;
 542        }
 543
 544        info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
 545        curr_ext_hdr_type = ext_hdr.ip6r_nxt;
 546    } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
 547
 548    info->l4proto = ext_hdr.ip6r_nxt;
 549    return true;
 550}
 551
 552bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
 553                         const void *pkt, size_t pkt_size)
 554{
 555    assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
 556
 557    if (pkt_size >= ETH_ZLEN) {
 558        return false;
 559    }
 560
 561    /* pad to minimum Ethernet frame length */
 562    memcpy(padded_pkt, pkt, pkt_size);
 563    memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
 564    *padded_buflen = ETH_ZLEN;
 565
 566    return true;
 567}
 568