qemu/hw/net/net_rx_pkt.c
<<
>>
Prefs
   1/*
   2 * QEMU RX packets abstractions
   3 *
   4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
   5 *
   6 * Developed by Daynix Computing LTD (http://www.daynix.com)
   7 *
   8 * Authors:
   9 * Dmitry Fleytman <dmitry@daynix.com>
  10 * Tamir Shomer <tamirs@daynix.com>
  11 * Yan Vugenfirer <yan@daynix.com>
  12 *
  13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14 * See the COPYING file in the top-level directory.
  15 *
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "trace.h"
  20#include "net_rx_pkt.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23
  24struct NetRxPkt {
  25    struct virtio_net_hdr virt_hdr;
  26    uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)];
  27    struct iovec *vec;
  28    uint16_t vec_len_total;
  29    uint16_t vec_len;
  30    uint32_t tot_len;
  31    uint16_t tci;
  32    size_t ehdr_buf_len;
  33    bool has_virt_hdr;
  34    eth_pkt_types_e packet_type;
  35
  36    /* Analysis results */
  37    bool isip4;
  38    bool isip6;
  39    bool isudp;
  40    bool istcp;
  41
  42    size_t l3hdr_off;
  43    size_t l4hdr_off;
  44    size_t l5hdr_off;
  45
  46    eth_ip6_hdr_info ip6hdr_info;
  47    eth_ip4_hdr_info ip4hdr_info;
  48    eth_l4_hdr_info  l4hdr_info;
  49};
  50
  51void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
  52{
  53    struct NetRxPkt *p = g_malloc0(sizeof *p);
  54    p->has_virt_hdr = has_virt_hdr;
  55    p->vec = NULL;
  56    p->vec_len_total = 0;
  57    *pkt = p;
  58}
  59
  60void net_rx_pkt_uninit(struct NetRxPkt *pkt)
  61{
  62    if (pkt->vec_len_total != 0) {
  63        g_free(pkt->vec);
  64    }
  65
  66    g_free(pkt);
  67}
  68
  69struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
  70{
  71    assert(pkt);
  72    return &pkt->virt_hdr;
  73}
  74
  75static inline void
  76net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
  77                            int new_iov_len)
  78{
  79    if (pkt->vec_len_total < new_iov_len) {
  80        g_free(pkt->vec);
  81        pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
  82        pkt->vec_len_total = new_iov_len;
  83    }
  84}
  85
  86static void
  87net_rx_pkt_pull_data(struct NetRxPkt *pkt,
  88                        const struct iovec *iov, int iovcnt,
  89                        size_t ploff)
  90{
  91    uint32_t pllen = iov_size(iov, iovcnt) - ploff;
  92
  93    if (pkt->ehdr_buf_len) {
  94        net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
  95
  96        pkt->vec[0].iov_base = pkt->ehdr_buf;
  97        pkt->vec[0].iov_len = pkt->ehdr_buf_len;
  98
  99        pkt->tot_len = pllen + pkt->ehdr_buf_len;
 100        pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
 101                                iov, iovcnt, ploff, pllen) + 1;
 102    } else {
 103        net_rx_pkt_iovec_realloc(pkt, iovcnt);
 104
 105        pkt->tot_len = pllen;
 106        pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
 107                                iov, iovcnt, ploff, pkt->tot_len);
 108    }
 109
 110    eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6,
 111                      &pkt->isudp, &pkt->istcp,
 112                      &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
 113                      &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
 114
 115    trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp,
 116                            pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
 117}
 118
 119void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
 120                                const struct iovec *iov, int iovcnt,
 121                                size_t iovoff, bool strip_vlan)
 122{
 123    uint16_t tci = 0;
 124    uint16_t ploff = iovoff;
 125    assert(pkt);
 126
 127    if (strip_vlan) {
 128        pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
 129                                           &ploff, &tci);
 130    } else {
 131        pkt->ehdr_buf_len = 0;
 132    }
 133
 134    pkt->tci = tci;
 135
 136    net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
 137}
 138
 139void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
 140                                const struct iovec *iov, int iovcnt,
 141                                size_t iovoff, bool strip_vlan,
 142                                uint16_t vet)
 143{
 144    uint16_t tci = 0;
 145    uint16_t ploff = iovoff;
 146    assert(pkt);
 147
 148    if (strip_vlan) {
 149        pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
 150                                              pkt->ehdr_buf,
 151                                              &ploff, &tci);
 152    } else {
 153        pkt->ehdr_buf_len = 0;
 154    }
 155
 156    pkt->tci = tci;
 157
 158    net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
 159}
 160
 161void net_rx_pkt_dump(struct NetRxPkt *pkt)
 162{
 163#ifdef NET_RX_PKT_DEBUG
 164    assert(pkt);
 165
 166    printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
 167              pkt->tot_len, pkt->ehdr_buf_len, pkt->tci);
 168#endif
 169}
 170
 171void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
 172    eth_pkt_types_e packet_type)
 173{
 174    assert(pkt);
 175
 176    pkt->packet_type = packet_type;
 177
 178}
 179
 180eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt)
 181{
 182    assert(pkt);
 183
 184    return pkt->packet_type;
 185}
 186
 187size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
 188{
 189    assert(pkt);
 190
 191    return pkt->tot_len;
 192}
 193
 194void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
 195                              size_t len)
 196{
 197    const struct iovec iov = {
 198        .iov_base = (void *)data,
 199        .iov_len = len
 200    };
 201
 202    assert(pkt);
 203
 204    eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6,
 205                      &pkt->isudp, &pkt->istcp,
 206                      &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
 207                      &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
 208}
 209
 210void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
 211                              bool *isip4, bool *isip6,
 212                              bool *isudp, bool *istcp)
 213{
 214    assert(pkt);
 215
 216    *isip4 = pkt->isip4;
 217    *isip6 = pkt->isip6;
 218    *isudp = pkt->isudp;
 219    *istcp = pkt->istcp;
 220}
 221
 222size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
 223{
 224    assert(pkt);
 225    return pkt->l3hdr_off;
 226}
 227
 228size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt)
 229{
 230    assert(pkt);
 231    return pkt->l4hdr_off;
 232}
 233
 234size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt)
 235{
 236    assert(pkt);
 237    return pkt->l5hdr_off;
 238}
 239
 240eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt)
 241{
 242    return &pkt->ip6hdr_info;
 243}
 244
 245eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
 246{
 247    return &pkt->ip4hdr_info;
 248}
 249
 250eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt)
 251{
 252    return &pkt->l4hdr_info;
 253}
 254
 255static inline void
 256_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
 257                      void *ptr, size_t size)
 258{
 259    memcpy(&rss_input[*bytes_written], ptr, size);
 260    trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written);
 261    *bytes_written += size;
 262}
 263
 264static inline void
 265_net_rx_rss_prepare_ip4(uint8_t *rss_input,
 266                        struct NetRxPkt *pkt,
 267                        size_t *bytes_written)
 268{
 269    struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr;
 270
 271    _net_rx_rss_add_chunk(rss_input, bytes_written,
 272                          &ip4_hdr->ip_src, sizeof(uint32_t));
 273
 274    _net_rx_rss_add_chunk(rss_input, bytes_written,
 275                          &ip4_hdr->ip_dst, sizeof(uint32_t));
 276}
 277
 278static inline void
 279_net_rx_rss_prepare_ip6(uint8_t *rss_input,
 280                        struct NetRxPkt *pkt,
 281                        bool ipv6ex, size_t *bytes_written)
 282{
 283    eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info;
 284
 285    _net_rx_rss_add_chunk(rss_input, bytes_written,
 286           (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src
 287                                                 : &ip6info->ip6_hdr.ip6_src,
 288           sizeof(struct in6_address));
 289
 290    _net_rx_rss_add_chunk(rss_input, bytes_written,
 291           (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst
 292                                                 : &ip6info->ip6_hdr.ip6_dst,
 293           sizeof(struct in6_address));
 294}
 295
 296static inline void
 297_net_rx_rss_prepare_tcp(uint8_t *rss_input,
 298                        struct NetRxPkt *pkt,
 299                        size_t *bytes_written)
 300{
 301    struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp;
 302
 303    _net_rx_rss_add_chunk(rss_input, bytes_written,
 304                          &tcphdr->th_sport, sizeof(uint16_t));
 305
 306    _net_rx_rss_add_chunk(rss_input, bytes_written,
 307                          &tcphdr->th_dport, sizeof(uint16_t));
 308}
 309
 310uint32_t
 311net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
 312                         NetRxPktRssType type,
 313                         uint8_t *key)
 314{
 315    uint8_t rss_input[36];
 316    size_t rss_length = 0;
 317    uint32_t rss_hash = 0;
 318    net_toeplitz_key key_data;
 319
 320    switch (type) {
 321    case NetPktRssIpV4:
 322        assert(pkt->isip4);
 323        trace_net_rx_pkt_rss_ip4();
 324        _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
 325        break;
 326    case NetPktRssIpV4Tcp:
 327        assert(pkt->isip4);
 328        assert(pkt->istcp);
 329        trace_net_rx_pkt_rss_ip4_tcp();
 330        _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
 331        _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
 332        break;
 333    case NetPktRssIpV6Tcp:
 334        assert(pkt->isip6);
 335        assert(pkt->istcp);
 336        trace_net_rx_pkt_rss_ip6_tcp();
 337        _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
 338        _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
 339        break;
 340    case NetPktRssIpV6:
 341        assert(pkt->isip6);
 342        trace_net_rx_pkt_rss_ip6();
 343        _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
 344        break;
 345    case NetPktRssIpV6Ex:
 346        assert(pkt->isip6);
 347        trace_net_rx_pkt_rss_ip6_ex();
 348        _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
 349        break;
 350    default:
 351        assert(false);
 352        break;
 353    }
 354
 355    net_toeplitz_key_init(&key_data, key);
 356    net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data);
 357
 358    trace_net_rx_pkt_rss_hash(rss_length, rss_hash);
 359
 360    return rss_hash;
 361}
 362
 363uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
 364{
 365    assert(pkt);
 366
 367    if (pkt->isip4) {
 368        return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
 369    }
 370
 371    return 0;
 372}
 373
 374bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
 375{
 376    assert(pkt);
 377
 378    if (pkt->istcp) {
 379        return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
 380    }
 381
 382    return false;
 383}
 384
 385bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
 386{
 387    assert(pkt);
 388
 389    if (pkt->istcp) {
 390        return pkt->l4hdr_info.has_tcp_data;
 391    }
 392
 393    return false;
 394}
 395
 396struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
 397{
 398    assert(pkt);
 399
 400    return pkt->vec;
 401}
 402
 403uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt)
 404{
 405    assert(pkt);
 406
 407    return pkt->vec_len;
 408}
 409
 410void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
 411                            struct virtio_net_hdr *vhdr)
 412{
 413    assert(pkt);
 414
 415    memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
 416}
 417
 418void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
 419    const struct iovec *iov, int iovcnt)
 420{
 421    assert(pkt);
 422
 423    iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
 424}
 425
 426bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
 427{
 428    assert(pkt);
 429
 430    return pkt->ehdr_buf_len ? true : false;
 431}
 432
 433bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt)
 434{
 435    assert(pkt);
 436
 437    return pkt->has_virt_hdr;
 438}
 439
 440uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
 441{
 442    assert(pkt);
 443
 444    return pkt->tci;
 445}
 446
 447bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)
 448{
 449    uint32_t cntr;
 450    uint16_t csum;
 451    uint32_t csl;
 452
 453    trace_net_rx_pkt_l3_csum_validate_entry();
 454
 455    if (!pkt->isip4) {
 456        trace_net_rx_pkt_l3_csum_validate_not_ip4();
 457        return false;
 458    }
 459
 460    csl = pkt->l4hdr_off - pkt->l3hdr_off;
 461
 462    cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len,
 463                                pkt->l3hdr_off,
 464                                csl, 0);
 465
 466    csum = net_checksum_finish(cntr);
 467
 468    *csum_valid = (csum == 0);
 469
 470    trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl,
 471                                           cntr, csum, *csum_valid);
 472
 473    return true;
 474}
 475
 476static uint16_t
 477_net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
 478{
 479    uint32_t cntr;
 480    uint16_t csum;
 481    uint16_t csl;
 482    uint32_t cso;
 483
 484    trace_net_rx_pkt_l4_csum_calc_entry();
 485
 486    if (pkt->isip4) {
 487        if (pkt->isudp) {
 488            csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
 489            trace_net_rx_pkt_l4_csum_calc_ip4_udp();
 490        } else {
 491            csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) -
 492                  IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr);
 493            trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
 494        }
 495
 496        cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr,
 497                                            csl, &cso);
 498        trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
 499    } else {
 500        if (pkt->isudp) {
 501            csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
 502            trace_net_rx_pkt_l4_csum_calc_ip6_udp();
 503        } else {
 504            struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr;
 505            size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off;
 506            size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
 507
 508            csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) -
 509                  ip6opts_len;
 510            trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
 511        }
 512
 513        cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl,
 514                                            pkt->ip6hdr_info.l4proto, &cso);
 515        trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
 516    }
 517
 518    cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len,
 519                                 pkt->l4hdr_off, csl, cso);
 520
 521    csum = net_checksum_finish_nozero(cntr);
 522
 523    trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum);
 524
 525    return csum;
 526}
 527
 528bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
 529{
 530    uint16_t csum;
 531
 532    trace_net_rx_pkt_l4_csum_validate_entry();
 533
 534    if (!pkt->istcp && !pkt->isudp) {
 535        trace_net_rx_pkt_l4_csum_validate_not_xxp();
 536        return false;
 537    }
 538
 539    if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) {
 540        trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
 541        return false;
 542    }
 543
 544    if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
 545        trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
 546        return false;
 547    }
 548
 549    csum = _net_rx_pkt_calc_l4_csum(pkt);
 550
 551    *csum_valid = ((csum == 0) || (csum == 0xFFFF));
 552
 553    trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid);
 554
 555    return true;
 556}
 557
 558bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt)
 559{
 560    uint16_t csum = 0;
 561    uint32_t l4_cso;
 562
 563    trace_net_rx_pkt_l4_csum_fix_entry();
 564
 565    if (pkt->istcp) {
 566        l4_cso = offsetof(struct tcp_header, th_sum);
 567        trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso);
 568    } else if (pkt->isudp) {
 569        if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
 570            trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
 571            return false;
 572        }
 573        l4_cso = offsetof(struct udp_header, uh_sum);
 574        trace_net_rx_pkt_l4_csum_fix_udp(l4_cso);
 575    } else {
 576        trace_net_rx_pkt_l4_csum_fix_not_xxp();
 577        return false;
 578    }
 579
 580    if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
 581            trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
 582            return false;
 583    }
 584
 585    /* Set zero to checksum word */
 586    iov_from_buf(pkt->vec, pkt->vec_len,
 587                 pkt->l4hdr_off + l4_cso,
 588                 &csum, sizeof(csum));
 589
 590    /* Calculate L4 checksum */
 591    csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt));
 592
 593    /* Set calculated checksum to checksum word */
 594    iov_from_buf(pkt->vec, pkt->vec_len,
 595                 pkt->l4hdr_off + l4_cso,
 596                 &csum, sizeof(csum));
 597
 598    trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum);
 599
 600    return true;
 601}
 602