qemu/net/eth.c
<<
>>
Prefs
   1/*
   2 * QEMU network structures definitions and helper functions
   3 *
   4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
   5 *
   6 * Developed by Daynix Computing LTD (http://www.daynix.com)
   7 *
   8 * Authors:
   9 * Dmitry Fleytman <dmitry@daynix.com>
  10 * Tamir Shomer <tamirs@daynix.com>
  11 * Yan Vugenfirer <yan@daynix.com>
  12 *
  13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14 * See the COPYING file in the top-level directory.
  15 *
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "net/eth.h"
  20#include "net/checksum.h"
  21#include "net/tap.h"
  22
  23void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
  24    uint16_t vlan_ethtype, bool *is_new)
  25{
  26    struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  27
  28    switch (be16_to_cpu(ehdr->h_proto)) {
  29    case ETH_P_VLAN:
  30    case ETH_P_DVLAN:
  31        /* vlan hdr exists */
  32        *is_new = false;
  33        break;
  34
  35    default:
  36        /* No VLAN header, put a new one */
  37        vhdr->h_proto = ehdr->h_proto;
  38        ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  39        *is_new = true;
  40        break;
  41    }
  42    vhdr->h_tci = cpu_to_be16(vlan_tag);
  43}
  44
  45uint8_t
  46eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  47{
  48    uint8_t ecn_state = 0;
  49
  50    if (l3_proto == ETH_P_IP) {
  51        struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  52
  53        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  54            if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  55                ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  56            }
  57            if (l4proto == IP_PROTO_TCP) {
  58                return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  59            } else if (l4proto == IP_PROTO_UDP) {
  60                return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  61            }
  62        }
  63    } else if (l3_proto == ETH_P_IPV6) {
  64        struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  65
  66        if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  67            ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  68        }
  69
  70        if (l4proto == IP_PROTO_TCP) {
  71            return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  72        }
  73    }
  74
  75    /* Unsupported offload */
  76    g_assert_not_reached();
  77
  78    return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  79}
  80
  81uint16_t
  82eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  83{
  84    uint16_t proto;
  85    size_t copied;
  86    size_t size = iov_size(l2hdr_iov, iovcnt);
  87    size_t proto_offset = l2hdr_len - sizeof(proto);
  88
  89    if (size < proto_offset) {
  90        return ETH_P_UNKNOWN;
  91    }
  92
  93    copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  94                        &proto, sizeof(proto));
  95
  96    return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  97}
  98
  99static bool
 100_eth_copy_chunk(size_t input_size,
 101                const struct iovec *iov, int iovcnt,
 102                size_t offset, size_t length,
 103                void *buffer)
 104{
 105    size_t copied;
 106
 107    if (input_size < offset) {
 108        return false;
 109    }
 110
 111    copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
 112
 113    if (copied < length) {
 114        return false;
 115    }
 116
 117    return true;
 118}
 119
 120static bool
 121_eth_tcp_has_data(bool is_ip4,
 122                  const struct ip_header  *ip4_hdr,
 123                  const struct ip6_header *ip6_hdr,
 124                  size_t full_ip6hdr_len,
 125                  const struct tcp_header *tcp)
 126{
 127    uint32_t l4len;
 128
 129    if (is_ip4) {
 130        l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
 131    } else {
 132        size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
 133        l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
 134    }
 135
 136    return l4len > TCP_HEADER_DATA_OFFSET(tcp);
 137}
 138
 139void eth_get_protocols(const struct iovec *iov, int iovcnt,
 140                       bool *isip4, bool *isip6,
 141                       bool *isudp, bool *istcp,
 142                       size_t *l3hdr_off,
 143                       size_t *l4hdr_off,
 144                       size_t *l5hdr_off,
 145                       eth_ip6_hdr_info *ip6hdr_info,
 146                       eth_ip4_hdr_info *ip4hdr_info,
 147                       eth_l4_hdr_info  *l4hdr_info)
 148{
 149    int proto;
 150    bool fragment = false;
 151    size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
 152    size_t input_size = iov_size(iov, iovcnt);
 153    size_t copied;
 154
 155    *isip4 = *isip6 = *isudp = *istcp = false;
 156
 157    proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
 158
 159    *l3hdr_off = l2hdr_len;
 160
 161    if (proto == ETH_P_IP) {
 162        struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
 163
 164        if (input_size < l2hdr_len) {
 165            return;
 166        }
 167
 168        copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
 169
 170        *isip4 = true;
 171
 172        if (copied < sizeof(*iphdr)) {
 173            return;
 174        }
 175
 176        if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
 177            if (iphdr->ip_p == IP_PROTO_TCP) {
 178                *istcp = true;
 179            } else if (iphdr->ip_p == IP_PROTO_UDP) {
 180                *isudp = true;
 181            }
 182        }
 183
 184        ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
 185        *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
 186
 187        fragment = ip4hdr_info->fragment;
 188    } else if (proto == ETH_P_IPV6) {
 189
 190        *isip6 = true;
 191        if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
 192                               ip6hdr_info)) {
 193            if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
 194                *istcp = true;
 195            } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
 196                *isudp = true;
 197            }
 198        } else {
 199            return;
 200        }
 201
 202        *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
 203        fragment = ip6hdr_info->fragment;
 204    }
 205
 206    if (!fragment) {
 207        if (*istcp) {
 208            *istcp = _eth_copy_chunk(input_size,
 209                                     iov, iovcnt,
 210                                     *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
 211                                     &l4hdr_info->hdr.tcp);
 212
 213            if (*istcp) {
 214                *l5hdr_off = *l4hdr_off +
 215                    TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
 216
 217                l4hdr_info->has_tcp_data =
 218                    _eth_tcp_has_data(proto == ETH_P_IP,
 219                                      &ip4hdr_info->ip4_hdr,
 220                                      &ip6hdr_info->ip6_hdr,
 221                                      *l4hdr_off - *l3hdr_off,
 222                                      &l4hdr_info->hdr.tcp);
 223            }
 224        } else if (*isudp) {
 225            *isudp = _eth_copy_chunk(input_size,
 226                                     iov, iovcnt,
 227                                     *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
 228                                     &l4hdr_info->hdr.udp);
 229            *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
 230        }
 231    }
 232}
 233
 234size_t
 235eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
 236               uint8_t *new_ehdr_buf,
 237               uint16_t *payload_offset, uint16_t *tci)
 238{
 239    struct vlan_header vlan_hdr;
 240    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 241
 242    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 243                               new_ehdr, sizeof(*new_ehdr));
 244
 245    if (copied < sizeof(*new_ehdr)) {
 246        return 0;
 247    }
 248
 249    switch (be16_to_cpu(new_ehdr->h_proto)) {
 250    case ETH_P_VLAN:
 251    case ETH_P_DVLAN:
 252        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 253                            &vlan_hdr, sizeof(vlan_hdr));
 254
 255        if (copied < sizeof(vlan_hdr)) {
 256            return 0;
 257        }
 258
 259        new_ehdr->h_proto = vlan_hdr.h_proto;
 260
 261        *tci = be16_to_cpu(vlan_hdr.h_tci);
 262        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 263
 264        if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
 265
 266            copied = iov_to_buf(iov, iovcnt, *payload_offset,
 267                                PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
 268
 269            if (copied < sizeof(vlan_hdr)) {
 270                return 0;
 271            }
 272
 273            *payload_offset += sizeof(vlan_hdr);
 274
 275            return sizeof(struct eth_header) + sizeof(struct vlan_header);
 276        } else {
 277            return sizeof(struct eth_header);
 278        }
 279    default:
 280        return 0;
 281    }
 282}
 283
 284size_t
 285eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
 286                  uint16_t vet, uint8_t *new_ehdr_buf,
 287                  uint16_t *payload_offset, uint16_t *tci)
 288{
 289    struct vlan_header vlan_hdr;
 290    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
 291
 292    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
 293                               new_ehdr, sizeof(*new_ehdr));
 294
 295    if (copied < sizeof(*new_ehdr)) {
 296        return 0;
 297    }
 298
 299    if (be16_to_cpu(new_ehdr->h_proto) == vet) {
 300        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
 301                            &vlan_hdr, sizeof(vlan_hdr));
 302
 303        if (copied < sizeof(vlan_hdr)) {
 304            return 0;
 305        }
 306
 307        new_ehdr->h_proto = vlan_hdr.h_proto;
 308
 309        *tci = be16_to_cpu(vlan_hdr.h_tci);
 310        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
 311        return sizeof(struct eth_header);
 312    }
 313
 314    return 0;
 315}
 316
 317void
 318eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
 319                            void *l3hdr, size_t l3hdr_len,
 320                            size_t l3payload_len,
 321                            size_t frag_offset, bool more_frags)
 322{
 323    const struct iovec l2vec = {
 324        .iov_base = (void *) l2hdr,
 325        .iov_len = l2hdr_len
 326    };
 327
 328    if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
 329        uint16_t orig_flags;
 330        struct ip_header *iphdr = (struct ip_header *) l3hdr;
 331        uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
 332        uint16_t new_ip_off;
 333
 334        assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
 335        assert((frag_off_units & ~IP_OFFMASK) == 0);
 336
 337        orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
 338        new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
 339        iphdr->ip_off = cpu_to_be16(new_ip_off);
 340        iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
 341    }
 342}
 343
 344void
 345eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
 346{
 347    struct ip_header *iphdr = (struct ip_header *) l3hdr;
 348    iphdr->ip_sum = 0;
 349    iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
 350}
 351
 352uint32_t
 353eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
 354                             uint16_t csl,
 355                             uint32_t *cso)
 356{
 357    struct ip_pseudo_header ipph;
 358    ipph.ip_src = iphdr->ip_src;
 359    ipph.ip_dst = iphdr->ip_dst;
 360    ipph.ip_payload = cpu_to_be16(csl);
 361    ipph.ip_proto = iphdr->ip_p;
 362    ipph.zeros = 0;
 363    *cso = sizeof(ipph);
 364    return net_checksum_add(*cso, (uint8_t *) &ipph);
 365}
 366
 367uint32_t
 368eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
 369                             uint16_t csl,
 370                             uint8_t l4_proto,
 371                             uint32_t *cso)
 372{
 373    struct ip6_pseudo_header ipph;
 374    ipph.ip6_src = iphdr->ip6_src;
 375    ipph.ip6_dst = iphdr->ip6_dst;
 376    ipph.len = cpu_to_be16(csl);
 377    ipph.zero[0] = 0;
 378    ipph.zero[1] = 0;
 379    ipph.zero[2] = 0;
 380    ipph.next_hdr = l4_proto;
 381    *cso = sizeof(ipph);
 382    return net_checksum_add(*cso, (uint8_t *)&ipph);
 383}
 384
 385static bool
 386eth_is_ip6_extension_header_type(uint8_t hdr_type)
 387{
 388    switch (hdr_type) {
 389    case IP6_HOP_BY_HOP:
 390    case IP6_ROUTING:
 391    case IP6_FRAGMENT:
 392    case IP6_ESP:
 393    case IP6_AUTHENTICATION:
 394    case IP6_DESTINATON:
 395    case IP6_MOBILITY:
 396        return true;
 397    default:
 398        return false;
 399    }
 400}
 401
 402static bool
 403_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
 404                        size_t rthdr_offset,
 405                        struct ip6_ext_hdr *ext_hdr,
 406                        struct in6_address *dst_addr)
 407{
 408    struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
 409
 410    if ((rthdr->rtype == 2) &&
 411        (rthdr->len == sizeof(struct in6_address) / 8) &&
 412        (rthdr->segleft == 1)) {
 413
 414        size_t input_size = iov_size(pkt, pkt_frags);
 415        size_t bytes_read;
 416
 417        if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
 418            return false;
 419        }
 420
 421        bytes_read = iov_to_buf(pkt, pkt_frags,
 422                                rthdr_offset + sizeof(*ext_hdr),
 423                                dst_addr, sizeof(*dst_addr));
 424
 425        return bytes_read == sizeof(*dst_addr);
 426    }
 427
 428    return false;
 429}
 430
 431static bool
 432_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
 433                        size_t dsthdr_offset,
 434                        struct ip6_ext_hdr *ext_hdr,
 435                        struct in6_address *src_addr)
 436{
 437    size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
 438    struct ip6_option_hdr opthdr;
 439    size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
 440
 441    while (bytes_left > sizeof(opthdr)) {
 442        size_t input_size = iov_size(pkt, pkt_frags);
 443        size_t bytes_read, optlen;
 444
 445        if (input_size < opt_offset) {
 446            return false;
 447        }
 448
 449        bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
 450                                &opthdr, sizeof(opthdr));
 451
 452        if (bytes_read != sizeof(opthdr)) {
 453            return false;
 454        }
 455
 456        optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
 457                                               : (opthdr.len + sizeof(opthdr));
 458
 459        if (optlen > bytes_left) {
 460            return false;
 461        }
 462
 463        if (opthdr.type == IP6_OPT_HOME) {
 464            size_t input_size = iov_size(pkt, pkt_frags);
 465
 466            if (input_size < opt_offset + sizeof(opthdr)) {
 467                return false;
 468            }
 469
 470            bytes_read = iov_to_buf(pkt, pkt_frags,
 471                                    opt_offset + sizeof(opthdr),
 472                                    src_addr, sizeof(*src_addr));
 473
 474            return bytes_read == sizeof(*src_addr);
 475        }
 476
 477        opt_offset += optlen;
 478        bytes_left -= optlen;
 479    }
 480
 481    return false;
 482}
 483
 484bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
 485                        size_t ip6hdr_off, eth_ip6_hdr_info *info)
 486{
 487    struct ip6_ext_hdr ext_hdr;
 488    size_t bytes_read;
 489    uint8_t curr_ext_hdr_type;
 490    size_t input_size = iov_size(pkt, pkt_frags);
 491
 492    info->rss_ex_dst_valid = false;
 493    info->rss_ex_src_valid = false;
 494    info->fragment = false;
 495
 496    if (input_size < ip6hdr_off) {
 497        return false;
 498    }
 499
 500    bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
 501                            &info->ip6_hdr, sizeof(info->ip6_hdr));
 502    if (bytes_read < sizeof(info->ip6_hdr)) {
 503        return false;
 504    }
 505
 506    info->full_hdr_len = sizeof(struct ip6_header);
 507
 508    curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
 509
 510    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
 511        info->l4proto = info->ip6_hdr.ip6_nxt;
 512        info->has_ext_hdrs = false;
 513        return true;
 514    }
 515
 516    info->has_ext_hdrs = true;
 517
 518    do {
 519        if (input_size < ip6hdr_off + info->full_hdr_len) {
 520            return false;
 521        }
 522
 523        bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
 524                                &ext_hdr, sizeof(ext_hdr));
 525
 526        if (bytes_read < sizeof(ext_hdr)) {
 527            return false;
 528        }
 529
 530        if (curr_ext_hdr_type == IP6_ROUTING) {
 531            info->rss_ex_dst_valid =
 532                _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
 533                                         ip6hdr_off + info->full_hdr_len,
 534                                         &ext_hdr, &info->rss_ex_dst);
 535        } else if (curr_ext_hdr_type == IP6_DESTINATON) {
 536            info->rss_ex_src_valid =
 537                _eth_get_rss_ex_src_addr(pkt, pkt_frags,
 538                                         ip6hdr_off + info->full_hdr_len,
 539                                         &ext_hdr, &info->rss_ex_src);
 540        } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
 541            info->fragment = true;
 542        }
 543
 544        info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
 545        curr_ext_hdr_type = ext_hdr.ip6r_nxt;
 546    } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
 547
 548    info->l4proto = ext_hdr.ip6r_nxt;
 549    return true;
 550}
 551