qemu/hw/net/e1000e_core.c
<<
>>
Prefs
   1/*
   2* Core code for QEMU e1000e emulation
   3*
   4* Software developer's manuals:
   5* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
   6*
   7* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
   8* Developed by Daynix Computing LTD (http://www.daynix.com)
   9*
  10* Authors:
  11* Dmitry Fleytman <dmitry@daynix.com>
  12* Leonid Bloch <leonid@daynix.com>
  13* Yan Vugenfirer <yan@daynix.com>
  14*
  15* Based on work done by:
  16* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
  17* Copyright (c) 2008 Qumranet
  18* Based on work done by:
  19* Copyright (c) 2007 Dan Aloni
  20* Copyright (c) 2004 Antony T Curtis
  21*
  22* This library is free software; you can redistribute it and/or
  23* modify it under the terms of the GNU Lesser General Public
  24* License as published by the Free Software Foundation; either
  25* version 2.1 of the License, or (at your option) any later version.
  26*
  27* This library is distributed in the hope that it will be useful,
  28* but WITHOUT ANY WARRANTY; without even the implied warranty of
  29* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  30* Lesser General Public License for more details.
  31*
  32* You should have received a copy of the GNU Lesser General Public
  33* License along with this library; if not, see <http://www.gnu.org/licenses/>.
  34*/
  35
  36#include "qemu/osdep.h"
  37#include "qemu/log.h"
  38#include "net/net.h"
  39#include "net/tap.h"
  40#include "hw/pci/msi.h"
  41#include "hw/pci/msix.h"
  42#include "sysemu/runstate.h"
  43
  44#include "net_tx_pkt.h"
  45#include "net_rx_pkt.h"
  46
  47#include "e1000x_common.h"
  48#include "e1000e_core.h"
  49
  50#include "trace.h"
  51
  52#define E1000E_MIN_XITR     (500) /* No more then 7813 interrupts per
  53                                     second according to spec 10.2.4.2 */
  54#define E1000E_MAX_TX_FRAGS (64)
  55
  56static inline void
  57e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val);
  58
  59static inline void
  60e1000e_process_ts_option(E1000ECore *core, struct e1000_tx_desc *dp)
  61{
  62    if (le32_to_cpu(dp->upper.data) & E1000_TXD_EXTCMD_TSTAMP) {
  63        trace_e1000e_wrn_no_ts_support();
  64    }
  65}
  66
  67static inline void
  68e1000e_process_snap_option(E1000ECore *core, uint32_t cmd_and_length)
  69{
  70    if (cmd_and_length & E1000_TXD_CMD_SNAP) {
  71        trace_e1000e_wrn_no_snap_support();
  72    }
  73}
  74
  75static inline void
  76e1000e_raise_legacy_irq(E1000ECore *core)
  77{
  78    trace_e1000e_irq_legacy_notify(true);
  79    e1000x_inc_reg_if_not_full(core->mac, IAC);
  80    pci_set_irq(core->owner, 1);
  81}
  82
  83static inline void
  84e1000e_lower_legacy_irq(E1000ECore *core)
  85{
  86    trace_e1000e_irq_legacy_notify(false);
  87    pci_set_irq(core->owner, 0);
  88}
  89
  90static inline void
  91e1000e_intrmgr_rearm_timer(E1000IntrDelayTimer *timer)
  92{
  93    int64_t delay_ns = (int64_t) timer->core->mac[timer->delay_reg] *
  94                                 timer->delay_resolution_ns;
  95
  96    trace_e1000e_irq_rearm_timer(timer->delay_reg << 2, delay_ns);
  97
  98    timer_mod(timer->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns);
  99
 100    timer->running = true;
 101}
 102
 103static void
 104e1000e_intmgr_timer_resume(E1000IntrDelayTimer *timer)
 105{
 106    if (timer->running) {
 107        e1000e_intrmgr_rearm_timer(timer);
 108    }
 109}
 110
 111static void
 112e1000e_intmgr_timer_pause(E1000IntrDelayTimer *timer)
 113{
 114    if (timer->running) {
 115        timer_del(timer->timer);
 116    }
 117}
 118
 119static inline void
 120e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer)
 121{
 122    if (timer->running) {
 123        timer_del(timer->timer);
 124        timer->running = false;
 125    }
 126}
 127
 128static inline void
 129e1000e_intrmgr_fire_delayed_interrupts(E1000ECore *core)
 130{
 131    trace_e1000e_irq_fire_delayed_interrupts();
 132    e1000e_set_interrupt_cause(core, 0);
 133}
 134
 135static void
 136e1000e_intrmgr_on_timer(void *opaque)
 137{
 138    E1000IntrDelayTimer *timer = opaque;
 139
 140    trace_e1000e_irq_throttling_timer(timer->delay_reg << 2);
 141
 142    timer->running = false;
 143    e1000e_intrmgr_fire_delayed_interrupts(timer->core);
 144}
 145
 146static void
 147e1000e_intrmgr_on_throttling_timer(void *opaque)
 148{
 149    E1000IntrDelayTimer *timer = opaque;
 150
 151    assert(!msix_enabled(timer->core->owner));
 152
 153    timer->running = false;
 154
 155    if (!timer->core->itr_intr_pending) {
 156        trace_e1000e_irq_throttling_no_pending_interrupts();
 157        return;
 158    }
 159
 160    if (msi_enabled(timer->core->owner)) {
 161        trace_e1000e_irq_msi_notify_postponed();
 162        e1000e_set_interrupt_cause(timer->core, 0);
 163    } else {
 164        trace_e1000e_irq_legacy_notify_postponed();
 165        e1000e_set_interrupt_cause(timer->core, 0);
 166    }
 167}
 168
 169static void
 170e1000e_intrmgr_on_msix_throttling_timer(void *opaque)
 171{
 172    E1000IntrDelayTimer *timer = opaque;
 173    int idx = timer - &timer->core->eitr[0];
 174
 175    assert(msix_enabled(timer->core->owner));
 176
 177    timer->running = false;
 178
 179    if (!timer->core->eitr_intr_pending[idx]) {
 180        trace_e1000e_irq_throttling_no_pending_vec(idx);
 181        return;
 182    }
 183
 184    trace_e1000e_irq_msix_notify_postponed_vec(idx);
 185    msix_notify(timer->core->owner, idx);
 186}
 187
 188static void
 189e1000e_intrmgr_initialize_all_timers(E1000ECore *core, bool create)
 190{
 191    int i;
 192
 193    core->radv.delay_reg = RADV;
 194    core->rdtr.delay_reg = RDTR;
 195    core->raid.delay_reg = RAID;
 196    core->tadv.delay_reg = TADV;
 197    core->tidv.delay_reg = TIDV;
 198
 199    core->radv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
 200    core->rdtr.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
 201    core->raid.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
 202    core->tadv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
 203    core->tidv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
 204
 205    core->radv.core = core;
 206    core->rdtr.core = core;
 207    core->raid.core = core;
 208    core->tadv.core = core;
 209    core->tidv.core = core;
 210
 211    core->itr.core = core;
 212    core->itr.delay_reg = ITR;
 213    core->itr.delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES;
 214
 215    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 216        core->eitr[i].core = core;
 217        core->eitr[i].delay_reg = EITR + i;
 218        core->eitr[i].delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES;
 219    }
 220
 221    if (!create) {
 222        return;
 223    }
 224
 225    core->radv.timer =
 226        timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->radv);
 227    core->rdtr.timer =
 228        timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->rdtr);
 229    core->raid.timer =
 230        timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->raid);
 231
 232    core->tadv.timer =
 233        timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tadv);
 234    core->tidv.timer =
 235        timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tidv);
 236
 237    core->itr.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 238                                   e1000e_intrmgr_on_throttling_timer,
 239                                   &core->itr);
 240
 241    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 242        core->eitr[i].timer =
 243            timer_new_ns(QEMU_CLOCK_VIRTUAL,
 244                         e1000e_intrmgr_on_msix_throttling_timer,
 245                         &core->eitr[i]);
 246    }
 247}
 248
 249static inline void
 250e1000e_intrmgr_stop_delay_timers(E1000ECore *core)
 251{
 252    e1000e_intrmgr_stop_timer(&core->radv);
 253    e1000e_intrmgr_stop_timer(&core->rdtr);
 254    e1000e_intrmgr_stop_timer(&core->raid);
 255    e1000e_intrmgr_stop_timer(&core->tidv);
 256    e1000e_intrmgr_stop_timer(&core->tadv);
 257}
 258
 259static bool
 260e1000e_intrmgr_delay_rx_causes(E1000ECore *core, uint32_t *causes)
 261{
 262    uint32_t delayable_causes;
 263    uint32_t rdtr = core->mac[RDTR];
 264    uint32_t radv = core->mac[RADV];
 265    uint32_t raid = core->mac[RAID];
 266
 267    if (msix_enabled(core->owner)) {
 268        return false;
 269    }
 270
 271    delayable_causes = E1000_ICR_RXQ0 |
 272                       E1000_ICR_RXQ1 |
 273                       E1000_ICR_RXT0;
 274
 275    if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS)) {
 276        delayable_causes |= E1000_ICR_ACK;
 277    }
 278
 279    /* Clean up all causes that may be delayed */
 280    core->delayed_causes |= *causes & delayable_causes;
 281    *causes &= ~delayable_causes;
 282
 283    /* Check if delayed RX interrupts disabled by client
 284       or if there are causes that cannot be delayed */
 285    if ((rdtr == 0) || (*causes != 0)) {
 286        return false;
 287    }
 288
 289    /* Check if delayed RX ACK interrupts disabled by client
 290       and there is an ACK packet received */
 291    if ((raid == 0) && (core->delayed_causes & E1000_ICR_ACK)) {
 292        return false;
 293    }
 294
 295    /* All causes delayed */
 296    e1000e_intrmgr_rearm_timer(&core->rdtr);
 297
 298    if (!core->radv.running && (radv != 0)) {
 299        e1000e_intrmgr_rearm_timer(&core->radv);
 300    }
 301
 302    if (!core->raid.running && (core->delayed_causes & E1000_ICR_ACK)) {
 303        e1000e_intrmgr_rearm_timer(&core->raid);
 304    }
 305
 306    return true;
 307}
 308
 309static bool
 310e1000e_intrmgr_delay_tx_causes(E1000ECore *core, uint32_t *causes)
 311{
 312    static const uint32_t delayable_causes = E1000_ICR_TXQ0 |
 313                                             E1000_ICR_TXQ1 |
 314                                             E1000_ICR_TXQE |
 315                                             E1000_ICR_TXDW;
 316
 317    if (msix_enabled(core->owner)) {
 318        return false;
 319    }
 320
 321    /* Clean up all causes that may be delayed */
 322    core->delayed_causes |= *causes & delayable_causes;
 323    *causes &= ~delayable_causes;
 324
 325    /* If there are causes that cannot be delayed */
 326    if (*causes != 0) {
 327        return false;
 328    }
 329
 330    /* All causes delayed */
 331    e1000e_intrmgr_rearm_timer(&core->tidv);
 332
 333    if (!core->tadv.running && (core->mac[TADV] != 0)) {
 334        e1000e_intrmgr_rearm_timer(&core->tadv);
 335    }
 336
 337    return true;
 338}
 339
 340static uint32_t
 341e1000e_intmgr_collect_delayed_causes(E1000ECore *core)
 342{
 343    uint32_t res;
 344
 345    if (msix_enabled(core->owner)) {
 346        assert(core->delayed_causes == 0);
 347        return 0;
 348    }
 349
 350    res = core->delayed_causes;
 351    core->delayed_causes = 0;
 352
 353    e1000e_intrmgr_stop_delay_timers(core);
 354
 355    return res;
 356}
 357
 358static void
 359e1000e_intrmgr_fire_all_timers(E1000ECore *core)
 360{
 361    int i;
 362    uint32_t val = e1000e_intmgr_collect_delayed_causes(core);
 363
 364    trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]);
 365    core->mac[ICR] |= val;
 366
 367    if (core->itr.running) {
 368        timer_del(core->itr.timer);
 369        e1000e_intrmgr_on_throttling_timer(&core->itr);
 370    }
 371
 372    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 373        if (core->eitr[i].running) {
 374            timer_del(core->eitr[i].timer);
 375            e1000e_intrmgr_on_msix_throttling_timer(&core->eitr[i]);
 376        }
 377    }
 378}
 379
 380static void
 381e1000e_intrmgr_resume(E1000ECore *core)
 382{
 383    int i;
 384
 385    e1000e_intmgr_timer_resume(&core->radv);
 386    e1000e_intmgr_timer_resume(&core->rdtr);
 387    e1000e_intmgr_timer_resume(&core->raid);
 388    e1000e_intmgr_timer_resume(&core->tidv);
 389    e1000e_intmgr_timer_resume(&core->tadv);
 390
 391    e1000e_intmgr_timer_resume(&core->itr);
 392
 393    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 394        e1000e_intmgr_timer_resume(&core->eitr[i]);
 395    }
 396}
 397
 398static void
 399e1000e_intrmgr_pause(E1000ECore *core)
 400{
 401    int i;
 402
 403    e1000e_intmgr_timer_pause(&core->radv);
 404    e1000e_intmgr_timer_pause(&core->rdtr);
 405    e1000e_intmgr_timer_pause(&core->raid);
 406    e1000e_intmgr_timer_pause(&core->tidv);
 407    e1000e_intmgr_timer_pause(&core->tadv);
 408
 409    e1000e_intmgr_timer_pause(&core->itr);
 410
 411    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 412        e1000e_intmgr_timer_pause(&core->eitr[i]);
 413    }
 414}
 415
 416static void
 417e1000e_intrmgr_reset(E1000ECore *core)
 418{
 419    int i;
 420
 421    core->delayed_causes = 0;
 422
 423    e1000e_intrmgr_stop_delay_timers(core);
 424
 425    e1000e_intrmgr_stop_timer(&core->itr);
 426
 427    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 428        e1000e_intrmgr_stop_timer(&core->eitr[i]);
 429    }
 430}
 431
 432static void
 433e1000e_intrmgr_pci_unint(E1000ECore *core)
 434{
 435    int i;
 436
 437    timer_free(core->radv.timer);
 438    timer_free(core->rdtr.timer);
 439    timer_free(core->raid.timer);
 440
 441    timer_free(core->tadv.timer);
 442    timer_free(core->tidv.timer);
 443
 444    timer_free(core->itr.timer);
 445
 446    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
 447        timer_free(core->eitr[i].timer);
 448    }
 449}
 450
 451static void
 452e1000e_intrmgr_pci_realize(E1000ECore *core)
 453{
 454    e1000e_intrmgr_initialize_all_timers(core, true);
 455}
 456
 457static inline bool
 458e1000e_rx_csum_enabled(E1000ECore *core)
 459{
 460    return (core->mac[RXCSUM] & E1000_RXCSUM_PCSD) ? false : true;
 461}
 462
 463static inline bool
 464e1000e_rx_use_legacy_descriptor(E1000ECore *core)
 465{
 466    return (core->mac[RFCTL] & E1000_RFCTL_EXTEN) ? false : true;
 467}
 468
 469static inline bool
 470e1000e_rx_use_ps_descriptor(E1000ECore *core)
 471{
 472    return !e1000e_rx_use_legacy_descriptor(core) &&
 473           (core->mac[RCTL] & E1000_RCTL_DTYP_PS);
 474}
 475
 476static inline bool
 477e1000e_rss_enabled(E1000ECore *core)
 478{
 479    return E1000_MRQC_ENABLED(core->mac[MRQC]) &&
 480           !e1000e_rx_csum_enabled(core) &&
 481           !e1000e_rx_use_legacy_descriptor(core);
 482}
 483
 484typedef struct E1000E_RSSInfo_st {
 485    bool enabled;
 486    uint32_t hash;
 487    uint32_t queue;
 488    uint32_t type;
 489} E1000E_RSSInfo;
 490
 491static uint32_t
 492e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt)
 493{
 494    bool isip4, isip6, isudp, istcp;
 495
 496    assert(e1000e_rss_enabled(core));
 497
 498    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
 499
 500    if (isip4) {
 501        bool fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
 502
 503        trace_e1000e_rx_rss_ip4(fragment, istcp, core->mac[MRQC],
 504                                E1000_MRQC_EN_TCPIPV4(core->mac[MRQC]),
 505                                E1000_MRQC_EN_IPV4(core->mac[MRQC]));
 506
 507        if (!fragment && istcp && E1000_MRQC_EN_TCPIPV4(core->mac[MRQC])) {
 508            return E1000_MRQ_RSS_TYPE_IPV4TCP;
 509        }
 510
 511        if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) {
 512            return E1000_MRQ_RSS_TYPE_IPV4;
 513        }
 514    } else if (isip6) {
 515        eth_ip6_hdr_info *ip6info = net_rx_pkt_get_ip6_info(pkt);
 516
 517        bool ex_dis = core->mac[RFCTL] & E1000_RFCTL_IPV6_EX_DIS;
 518        bool new_ex_dis = core->mac[RFCTL] & E1000_RFCTL_NEW_IPV6_EXT_DIS;
 519
 520        /*
 521         * Following two traces must not be combined because resulting
 522         * event will have 11 arguments totally and some trace backends
 523         * (at least "ust") have limitation of maximum 10 arguments per
 524         * event. Events with more arguments fail to compile for
 525         * backends like these.
 526         */
 527        trace_e1000e_rx_rss_ip6_rfctl(core->mac[RFCTL]);
 528        trace_e1000e_rx_rss_ip6(ex_dis, new_ex_dis, istcp,
 529                                ip6info->has_ext_hdrs,
 530                                ip6info->rss_ex_dst_valid,
 531                                ip6info->rss_ex_src_valid,
 532                                core->mac[MRQC],
 533                                E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]),
 534                                E1000_MRQC_EN_IPV6EX(core->mac[MRQC]),
 535                                E1000_MRQC_EN_IPV6(core->mac[MRQC]));
 536
 537        if ((!ex_dis || !ip6info->has_ext_hdrs) &&
 538            (!new_ex_dis || !(ip6info->rss_ex_dst_valid ||
 539                              ip6info->rss_ex_src_valid))) {
 540
 541            if (istcp && !ip6info->fragment &&
 542                E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) {
 543                return E1000_MRQ_RSS_TYPE_IPV6TCP;
 544            }
 545
 546            if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) {
 547                return E1000_MRQ_RSS_TYPE_IPV6EX;
 548            }
 549
 550        }
 551
 552        if (E1000_MRQC_EN_IPV6(core->mac[MRQC])) {
 553            return E1000_MRQ_RSS_TYPE_IPV6;
 554        }
 555
 556    }
 557
 558    return E1000_MRQ_RSS_TYPE_NONE;
 559}
 560
 561static uint32_t
 562e1000e_rss_calc_hash(E1000ECore *core,
 563                     struct NetRxPkt *pkt,
 564                     E1000E_RSSInfo *info)
 565{
 566    NetRxPktRssType type;
 567
 568    assert(e1000e_rss_enabled(core));
 569
 570    switch (info->type) {
 571    case E1000_MRQ_RSS_TYPE_IPV4:
 572        type = NetPktRssIpV4;
 573        break;
 574    case E1000_MRQ_RSS_TYPE_IPV4TCP:
 575        type = NetPktRssIpV4Tcp;
 576        break;
 577    case E1000_MRQ_RSS_TYPE_IPV6TCP:
 578        type = NetPktRssIpV6TcpEx;
 579        break;
 580    case E1000_MRQ_RSS_TYPE_IPV6:
 581        type = NetPktRssIpV6;
 582        break;
 583    case E1000_MRQ_RSS_TYPE_IPV6EX:
 584        type = NetPktRssIpV6Ex;
 585        break;
 586    default:
 587        assert(false);
 588        return 0;
 589    }
 590
 591    return net_rx_pkt_calc_rss_hash(pkt, type, (uint8_t *) &core->mac[RSSRK]);
 592}
 593
 594static void
 595e1000e_rss_parse_packet(E1000ECore *core,
 596                        struct NetRxPkt *pkt,
 597                        E1000E_RSSInfo *info)
 598{
 599    trace_e1000e_rx_rss_started();
 600
 601    if (!e1000e_rss_enabled(core)) {
 602        info->enabled = false;
 603        info->hash = 0;
 604        info->queue = 0;
 605        info->type = 0;
 606        trace_e1000e_rx_rss_disabled();
 607        return;
 608    }
 609
 610    info->enabled = true;
 611
 612    info->type = e1000e_rss_get_hash_type(core, pkt);
 613
 614    trace_e1000e_rx_rss_type(info->type);
 615
 616    if (info->type == E1000_MRQ_RSS_TYPE_NONE) {
 617        info->hash = 0;
 618        info->queue = 0;
 619        return;
 620    }
 621
 622    info->hash = e1000e_rss_calc_hash(core, pkt, info);
 623    info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
 624}
 625
 626static void
 627e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx)
 628{
 629    if (tx->props.tse && tx->cptse) {
 630        net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->props.mss);
 631        net_tx_pkt_update_ip_checksums(tx->tx_pkt);
 632        e1000x_inc_reg_if_not_full(core->mac, TSCTC);
 633        return;
 634    }
 635
 636    if (tx->sum_needed & E1000_TXD_POPTS_TXSM) {
 637        net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0);
 638    }
 639
 640    if (tx->sum_needed & E1000_TXD_POPTS_IXSM) {
 641        net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
 642    }
 643}
 644
 645static bool
 646e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index)
 647{
 648    int target_queue = MIN(core->max_queue_num, queue_index);
 649    NetClientState *queue = qemu_get_subqueue(core->owner_nic, target_queue);
 650
 651    e1000e_setup_tx_offloads(core, tx);
 652
 653    net_tx_pkt_dump(tx->tx_pkt);
 654
 655    if ((core->phy[0][PHY_CTRL] & MII_CR_LOOPBACK) ||
 656        ((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) {
 657        return net_tx_pkt_send_loopback(tx->tx_pkt, queue);
 658    } else {
 659        return net_tx_pkt_send(tx->tx_pkt, queue);
 660    }
 661}
 662
 663static void
 664e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt)
 665{
 666    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 667                                    PTC1023, PTC1522 };
 668
 669    size_t tot_len = net_tx_pkt_get_total_len(tx_pkt);
 670
 671    e1000x_increase_size_stats(core->mac, PTCregs, tot_len);
 672    e1000x_inc_reg_if_not_full(core->mac, TPT);
 673    e1000x_grow_8reg_if_not_full(core->mac, TOTL, tot_len);
 674
 675    switch (net_tx_pkt_get_packet_type(tx_pkt)) {
 676    case ETH_PKT_BCAST:
 677        e1000x_inc_reg_if_not_full(core->mac, BPTC);
 678        break;
 679    case ETH_PKT_MCAST:
 680        e1000x_inc_reg_if_not_full(core->mac, MPTC);
 681        break;
 682    case ETH_PKT_UCAST:
 683        break;
 684    default:
 685        g_assert_not_reached();
 686    }
 687
 688    core->mac[GPTC] = core->mac[TPT];
 689    core->mac[GOTCL] = core->mac[TOTL];
 690    core->mac[GOTCH] = core->mac[TOTH];
 691}
 692
 693static void
 694e1000e_process_tx_desc(E1000ECore *core,
 695                       struct e1000e_tx *tx,
 696                       struct e1000_tx_desc *dp,
 697                       int queue_index)
 698{
 699    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 700    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 701    unsigned int split_size = txd_lower & 0xffff;
 702    uint64_t addr;
 703    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 704    bool eop = txd_lower & E1000_TXD_CMD_EOP;
 705
 706    if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */
 707        e1000x_read_tx_ctx_descr(xp, &tx->props);
 708        e1000e_process_snap_option(core, le32_to_cpu(xp->cmd_and_length));
 709        return;
 710    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 711        /* data descriptor */
 712        tx->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 713        tx->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 714        e1000e_process_ts_option(core, dp);
 715    } else {
 716        /* legacy descriptor */
 717        e1000e_process_ts_option(core, dp);
 718        tx->cptse = 0;
 719    }
 720
 721    addr = le64_to_cpu(dp->buffer_addr);
 722
 723    if (!tx->skip_cp) {
 724        if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) {
 725            tx->skip_cp = true;
 726        }
 727    }
 728
 729    if (eop) {
 730        if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
 731            if (e1000x_vlan_enabled(core->mac) &&
 732                e1000x_is_vlan_txd(txd_lower)) {
 733                net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt,
 734                    le16_to_cpu(dp->upper.fields.special), core->vet);
 735            }
 736            if (e1000e_tx_pkt_send(core, tx, queue_index)) {
 737                e1000e_on_tx_done_update_stats(core, tx->tx_pkt);
 738            }
 739        }
 740
 741        tx->skip_cp = false;
 742        net_tx_pkt_reset(tx->tx_pkt);
 743
 744        tx->sum_needed = 0;
 745        tx->cptse = 0;
 746    }
 747}
 748
 749static inline uint32_t
 750e1000e_tx_wb_interrupt_cause(E1000ECore *core, int queue_idx)
 751{
 752    if (!msix_enabled(core->owner)) {
 753        return E1000_ICR_TXDW;
 754    }
 755
 756    return (queue_idx == 0) ? E1000_ICR_TXQ0 : E1000_ICR_TXQ1;
 757}
 758
 759static inline uint32_t
 760e1000e_rx_wb_interrupt_cause(E1000ECore *core, int queue_idx,
 761                             bool min_threshold_hit)
 762{
 763    if (!msix_enabled(core->owner)) {
 764        return E1000_ICS_RXT0 | (min_threshold_hit ? E1000_ICS_RXDMT0 : 0);
 765    }
 766
 767    return (queue_idx == 0) ? E1000_ICR_RXQ0 : E1000_ICR_RXQ1;
 768}
 769
 770static uint32_t
 771e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base,
 772                        struct e1000_tx_desc *dp, bool *ide, int queue_idx)
 773{
 774    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 775
 776    if (!(txd_lower & E1000_TXD_CMD_RS) &&
 777        !(core->mac[IVAR] & E1000_IVAR_TX_INT_EVERY_WB)) {
 778        return 0;
 779    }
 780
 781    *ide = (txd_lower & E1000_TXD_CMD_IDE) ? true : false;
 782
 783    txd_upper = le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD;
 784
 785    dp->upper.data = cpu_to_le32(txd_upper);
 786    pci_dma_write(core->owner, base + ((char *)&dp->upper - (char *)dp),
 787                  &dp->upper, sizeof(dp->upper));
 788    return e1000e_tx_wb_interrupt_cause(core, queue_idx);
 789}
 790
 791typedef struct E1000E_RingInfo_st {
 792    int dbah;
 793    int dbal;
 794    int dlen;
 795    int dh;
 796    int dt;
 797    int idx;
 798} E1000E_RingInfo;
 799
 800static inline bool
 801e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r)
 802{
 803    return core->mac[r->dh] == core->mac[r->dt] ||
 804                core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
 805}
 806
 807static inline uint64_t
 808e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
 809{
 810    uint64_t bah = core->mac[r->dbah];
 811    uint64_t bal = core->mac[r->dbal];
 812
 813    return (bah << 32) + bal;
 814}
 815
 816static inline uint64_t
 817e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r)
 818{
 819    return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
 820}
 821
 822static inline void
 823e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
 824{
 825    core->mac[r->dh] += count;
 826
 827    if (core->mac[r->dh] * E1000_RING_DESC_LEN >= core->mac[r->dlen]) {
 828        core->mac[r->dh] = 0;
 829    }
 830}
 831
 832static inline uint32_t
 833e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
 834{
 835    trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
 836                                 core->mac[r->dh],  core->mac[r->dt]);
 837
 838    if (core->mac[r->dh] <= core->mac[r->dt]) {
 839        return core->mac[r->dt] - core->mac[r->dh];
 840    }
 841
 842    if (core->mac[r->dh] > core->mac[r->dt]) {
 843        return core->mac[r->dlen] / E1000_RING_DESC_LEN +
 844               core->mac[r->dt] - core->mac[r->dh];
 845    }
 846
 847    g_assert_not_reached();
 848    return 0;
 849}
 850
 851static inline bool
 852e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r)
 853{
 854    return core->mac[r->dlen] > 0;
 855}
 856
 857static inline uint32_t
 858e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r)
 859{
 860    return core->mac[r->dlen];
 861}
 862
 863typedef struct E1000E_TxRing_st {
 864    const E1000E_RingInfo *i;
 865    struct e1000e_tx *tx;
 866} E1000E_TxRing;
 867
 868static inline int
 869e1000e_mq_queue_idx(int base_reg_idx, int reg_idx)
 870{
 871    return (reg_idx - base_reg_idx) / (0x100 >> 2);
 872}
 873
 874static inline void
 875e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
 876{
 877    static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
 878        { TDBAH,  TDBAL,  TDLEN,  TDH,  TDT, 0 },
 879        { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }
 880    };
 881
 882    assert(idx < ARRAY_SIZE(i));
 883
 884    txr->i     = &i[idx];
 885    txr->tx    = &core->tx[idx];
 886}
 887
 888typedef struct E1000E_RxRing_st {
 889    const E1000E_RingInfo *i;
 890} E1000E_RxRing;
 891
 892static inline void
 893e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
 894{
 895    static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
 896        { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
 897        { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }
 898    };
 899
 900    assert(idx < ARRAY_SIZE(i));
 901
 902    rxr->i      = &i[idx];
 903}
 904
 905static void
 906e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
 907{
 908    dma_addr_t base;
 909    struct e1000_tx_desc desc;
 910    bool ide = false;
 911    const E1000E_RingInfo *txi = txr->i;
 912    uint32_t cause = E1000_ICS_TXQE;
 913
 914    if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
 915        trace_e1000e_tx_disabled();
 916        return;
 917    }
 918
 919    while (!e1000e_ring_empty(core, txi)) {
 920        base = e1000e_ring_head_descr(core, txi);
 921
 922        pci_dma_read(core->owner, base, &desc, sizeof(desc));
 923
 924        trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr,
 925                              desc.lower.data, desc.upper.data);
 926
 927        e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx);
 928        cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx);
 929
 930        e1000e_ring_advance(core, txi, 1);
 931    }
 932
 933    if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) {
 934        e1000e_set_interrupt_cause(core, cause);
 935    }
 936}
 937
 938static bool
 939e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
 940                  size_t total_size)
 941{
 942    uint32_t bufs = e1000e_ring_free_descr_num(core, r);
 943
 944    trace_e1000e_rx_has_buffers(r->idx, bufs, total_size,
 945                                core->rx_desc_buf_size);
 946
 947    return total_size <= bufs / (core->rx_desc_len / E1000_MIN_RX_DESC_LEN) *
 948                         core->rx_desc_buf_size;
 949}
 950
 951void
 952e1000e_start_recv(E1000ECore *core)
 953{
 954    int i;
 955
 956    trace_e1000e_rx_start_recv();
 957
 958    for (i = 0; i <= core->max_queue_num; i++) {
 959        qemu_flush_queued_packets(qemu_get_subqueue(core->owner_nic, i));
 960    }
 961}
 962
 963bool
 964e1000e_can_receive(E1000ECore *core)
 965{
 966    int i;
 967
 968    if (!e1000x_rx_ready(core->owner, core->mac)) {
 969        return false;
 970    }
 971
 972    for (i = 0; i < E1000E_NUM_QUEUES; i++) {
 973        E1000E_RxRing rxr;
 974
 975        e1000e_rx_ring_init(core, &rxr, i);
 976        if (e1000e_ring_enabled(core, rxr.i) &&
 977            e1000e_has_rxbufs(core, rxr.i, 1)) {
 978            trace_e1000e_rx_can_recv();
 979            return true;
 980        }
 981    }
 982
 983    trace_e1000e_rx_can_recv_rings_full();
 984    return false;
 985}
 986
 987ssize_t
 988e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size)
 989{
 990    const struct iovec iov = {
 991        .iov_base = (uint8_t *)buf,
 992        .iov_len = size
 993    };
 994
 995    return e1000e_receive_iov(core, &iov, 1);
 996}
 997
 998static inline bool
 999e1000e_rx_l3_cso_enabled(E1000ECore *core)
1000{
1001    return !!(core->mac[RXCSUM] & E1000_RXCSUM_IPOFLD);
1002}
1003
1004static inline bool
1005e1000e_rx_l4_cso_enabled(E1000ECore *core)
1006{
1007    return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
1008}
1009
1010static bool
1011e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size)
1012{
1013    uint32_t rctl = core->mac[RCTL];
1014
1015    if (e1000x_is_vlan_packet(buf, core->vet) &&
1016        e1000x_vlan_rx_filter_enabled(core->mac)) {
1017        uint16_t vid = lduw_be_p(buf + 14);
1018        uint32_t vfta = ldl_le_p((uint32_t *)(core->mac + VFTA) +
1019                                 ((vid >> 5) & 0x7f));
1020        if ((vfta & (1 << (vid & 0x1f))) == 0) {
1021            trace_e1000e_rx_flt_vlan_mismatch(vid);
1022            return false;
1023        } else {
1024            trace_e1000e_rx_flt_vlan_match(vid);
1025        }
1026    }
1027
1028    switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
1029    case ETH_PKT_UCAST:
1030        if (rctl & E1000_RCTL_UPE) {
1031            return true; /* promiscuous ucast */
1032        }
1033        break;
1034
1035    case ETH_PKT_BCAST:
1036        if (rctl & E1000_RCTL_BAM) {
1037            return true; /* broadcast enabled */
1038        }
1039        break;
1040
1041    case ETH_PKT_MCAST:
1042        if (rctl & E1000_RCTL_MPE) {
1043            return true; /* promiscuous mcast */
1044        }
1045        break;
1046
1047    default:
1048        g_assert_not_reached();
1049    }
1050
1051    return e1000x_rx_group_filter(core->mac, buf);
1052}
1053
1054static inline void
1055e1000e_read_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr)
1056{
1057    struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
1058    *buff_addr = le64_to_cpu(d->buffer_addr);
1059}
1060
1061static inline void
1062e1000e_read_ext_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr)
1063{
1064    union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc;
1065    *buff_addr = le64_to_cpu(d->read.buffer_addr);
1066}
1067
1068static inline void
1069e1000e_read_ps_rx_descr(E1000ECore *core, uint8_t *desc,
1070                        hwaddr (*buff_addr)[MAX_PS_BUFFERS])
1071{
1072    int i;
1073    union e1000_rx_desc_packet_split *d =
1074        (union e1000_rx_desc_packet_split *) desc;
1075
1076    for (i = 0; i < MAX_PS_BUFFERS; i++) {
1077        (*buff_addr)[i] = le64_to_cpu(d->read.buffer_addr[i]);
1078    }
1079
1080    trace_e1000e_rx_desc_ps_read((*buff_addr)[0], (*buff_addr)[1],
1081                                 (*buff_addr)[2], (*buff_addr)[3]);
1082}
1083
1084static inline void
1085e1000e_read_rx_descr(E1000ECore *core, uint8_t *desc,
1086                     hwaddr (*buff_addr)[MAX_PS_BUFFERS])
1087{
1088    if (e1000e_rx_use_legacy_descriptor(core)) {
1089        e1000e_read_lgcy_rx_descr(core, desc, &(*buff_addr)[0]);
1090        (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0;
1091    } else {
1092        if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
1093            e1000e_read_ps_rx_descr(core, desc, buff_addr);
1094        } else {
1095            e1000e_read_ext_rx_descr(core, desc, &(*buff_addr)[0]);
1096            (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0;
1097        }
1098    }
1099}
1100
1101static void
1102e1000e_verify_csum_in_sw(E1000ECore *core,
1103                         struct NetRxPkt *pkt,
1104                         uint32_t *status_flags,
1105                         bool istcp, bool isudp)
1106{
1107    bool csum_valid;
1108    uint32_t csum_error;
1109
1110    if (e1000e_rx_l3_cso_enabled(core)) {
1111        if (!net_rx_pkt_validate_l3_csum(pkt, &csum_valid)) {
1112            trace_e1000e_rx_metadata_l3_csum_validation_failed();
1113        } else {
1114            csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_IPE;
1115            *status_flags |= E1000_RXD_STAT_IPCS | csum_error;
1116        }
1117    } else {
1118        trace_e1000e_rx_metadata_l3_cso_disabled();
1119    }
1120
1121    if (!e1000e_rx_l4_cso_enabled(core)) {
1122        trace_e1000e_rx_metadata_l4_cso_disabled();
1123        return;
1124    }
1125
1126    if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) {
1127        trace_e1000e_rx_metadata_l4_csum_validation_failed();
1128        return;
1129    }
1130
1131    csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_TCPE;
1132
1133    if (istcp) {
1134        *status_flags |= E1000_RXD_STAT_TCPCS |
1135                         csum_error;
1136    } else if (isudp) {
1137        *status_flags |= E1000_RXD_STAT_TCPCS |
1138                         E1000_RXD_STAT_UDPCS |
1139                         csum_error;
1140    }
1141}
1142
1143static inline bool
1144e1000e_is_tcp_ack(E1000ECore *core, struct NetRxPkt *rx_pkt)
1145{
1146    if (!net_rx_pkt_is_tcp_ack(rx_pkt)) {
1147        return false;
1148    }
1149
1150    if (core->mac[RFCTL] & E1000_RFCTL_ACK_DATA_DIS) {
1151        return !net_rx_pkt_has_tcp_data(rx_pkt);
1152    }
1153
1154    return true;
1155}
1156
1157static void
1158e1000e_build_rx_metadata(E1000ECore *core,
1159                         struct NetRxPkt *pkt,
1160                         bool is_eop,
1161                         const E1000E_RSSInfo *rss_info,
1162                         uint32_t *rss, uint32_t *mrq,
1163                         uint32_t *status_flags,
1164                         uint16_t *ip_id,
1165                         uint16_t *vlan_tag)
1166{
1167    struct virtio_net_hdr *vhdr;
1168    bool isip4, isip6, istcp, isudp;
1169    uint32_t pkt_type;
1170
1171    *status_flags = E1000_RXD_STAT_DD;
1172
1173    /* No additional metadata needed for non-EOP descriptors */
1174    if (!is_eop) {
1175        goto func_exit;
1176    }
1177
1178    *status_flags |= E1000_RXD_STAT_EOP;
1179
1180    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1181    trace_e1000e_rx_metadata_protocols(isip4, isip6, isudp, istcp);
1182
1183    /* VLAN state */
1184    if (net_rx_pkt_is_vlan_stripped(pkt)) {
1185        *status_flags |= E1000_RXD_STAT_VP;
1186        *vlan_tag = cpu_to_le16(net_rx_pkt_get_vlan_tag(pkt));
1187        trace_e1000e_rx_metadata_vlan(*vlan_tag);
1188    }
1189
1190    /* Packet parsing results */
1191    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
1192        if (rss_info->enabled) {
1193            *rss = cpu_to_le32(rss_info->hash);
1194            *mrq = cpu_to_le32(rss_info->type | (rss_info->queue << 8));
1195            trace_e1000e_rx_metadata_rss(*rss, *mrq);
1196        }
1197    } else if (isip4) {
1198            *status_flags |= E1000_RXD_STAT_IPIDV;
1199            *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
1200            trace_e1000e_rx_metadata_ip_id(*ip_id);
1201    }
1202
1203    if (istcp && e1000e_is_tcp_ack(core, pkt)) {
1204        *status_flags |= E1000_RXD_STAT_ACK;
1205        trace_e1000e_rx_metadata_ack();
1206    }
1207
1208    if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
1209        trace_e1000e_rx_metadata_ipv6_filtering_disabled();
1210        pkt_type = E1000_RXD_PKT_MAC;
1211    } else if (istcp || isudp) {
1212        pkt_type = isip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP;
1213    } else if (isip4 || isip6) {
1214        pkt_type = isip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6;
1215    } else {
1216        pkt_type = E1000_RXD_PKT_MAC;
1217    }
1218
1219    *status_flags |= E1000_RXD_PKT_TYPE(pkt_type);
1220    trace_e1000e_rx_metadata_pkt_type(pkt_type);
1221
1222    /* RX CSO information */
1223    if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
1224        trace_e1000e_rx_metadata_ipv6_sum_disabled();
1225        goto func_exit;
1226    }
1227
1228    if (!net_rx_pkt_has_virt_hdr(pkt)) {
1229        trace_e1000e_rx_metadata_no_virthdr();
1230        e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp);
1231        goto func_exit;
1232    }
1233
1234    vhdr = net_rx_pkt_get_vhdr(pkt);
1235
1236    if (!(vhdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) &&
1237        !(vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
1238        trace_e1000e_rx_metadata_virthdr_no_csum_info();
1239        e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp);
1240        goto func_exit;
1241    }
1242
1243    if (e1000e_rx_l3_cso_enabled(core)) {
1244        *status_flags |= isip4 ? E1000_RXD_STAT_IPCS : 0;
1245    } else {
1246        trace_e1000e_rx_metadata_l3_cso_disabled();
1247    }
1248
1249    if (e1000e_rx_l4_cso_enabled(core)) {
1250        if (istcp) {
1251            *status_flags |= E1000_RXD_STAT_TCPCS;
1252        } else if (isudp) {
1253            *status_flags |= E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS;
1254        }
1255    } else {
1256        trace_e1000e_rx_metadata_l4_cso_disabled();
1257    }
1258
1259    trace_e1000e_rx_metadata_status_flags(*status_flags);
1260
1261func_exit:
1262    *status_flags = cpu_to_le32(*status_flags);
1263}
1264
1265static inline void
1266e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc,
1267                           struct NetRxPkt *pkt,
1268                           const E1000E_RSSInfo *rss_info,
1269                           uint16_t length)
1270{
1271    uint32_t status_flags, rss, mrq;
1272    uint16_t ip_id;
1273
1274    struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
1275
1276    assert(!rss_info->enabled);
1277
1278    d->length = cpu_to_le16(length);
1279    d->csum = 0;
1280
1281    e1000e_build_rx_metadata(core, pkt, pkt != NULL,
1282                             rss_info,
1283                             &rss, &mrq,
1284                             &status_flags, &ip_id,
1285                             &d->special);
1286    d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
1287    d->status = (uint8_t) le32_to_cpu(status_flags);
1288    d->special = 0;
1289}
1290
1291static inline void
1292e1000e_write_ext_rx_descr(E1000ECore *core, uint8_t *desc,
1293                          struct NetRxPkt *pkt,
1294                          const E1000E_RSSInfo *rss_info,
1295                          uint16_t length)
1296{
1297    union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc;
1298
1299    memset(&d->wb, 0, sizeof(d->wb));
1300
1301    d->wb.upper.length = cpu_to_le16(length);
1302
1303    e1000e_build_rx_metadata(core, pkt, pkt != NULL,
1304                             rss_info,
1305                             &d->wb.lower.hi_dword.rss,
1306                             &d->wb.lower.mrq,
1307                             &d->wb.upper.status_error,
1308                             &d->wb.lower.hi_dword.csum_ip.ip_id,
1309                             &d->wb.upper.vlan);
1310}
1311
1312static inline void
1313e1000e_write_ps_rx_descr(E1000ECore *core, uint8_t *desc,
1314                         struct NetRxPkt *pkt,
1315                         const E1000E_RSSInfo *rss_info,
1316                         size_t ps_hdr_len,
1317                         uint16_t(*written)[MAX_PS_BUFFERS])
1318{
1319    int i;
1320    union e1000_rx_desc_packet_split *d =
1321        (union e1000_rx_desc_packet_split *) desc;
1322
1323    memset(&d->wb, 0, sizeof(d->wb));
1324
1325    d->wb.middle.length0 = cpu_to_le16((*written)[0]);
1326
1327    for (i = 0; i < PS_PAGE_BUFFERS; i++) {
1328        d->wb.upper.length[i] = cpu_to_le16((*written)[i + 1]);
1329    }
1330
1331    e1000e_build_rx_metadata(core, pkt, pkt != NULL,
1332                             rss_info,
1333                             &d->wb.lower.hi_dword.rss,
1334                             &d->wb.lower.mrq,
1335                             &d->wb.middle.status_error,
1336                             &d->wb.lower.hi_dword.csum_ip.ip_id,
1337                             &d->wb.middle.vlan);
1338
1339    d->wb.upper.header_status =
1340        cpu_to_le16(ps_hdr_len | (ps_hdr_len ? E1000_RXDPS_HDRSTAT_HDRSP : 0));
1341
1342    trace_e1000e_rx_desc_ps_write((*written)[0], (*written)[1],
1343                                  (*written)[2], (*written)[3]);
1344}
1345
1346static inline void
1347e1000e_write_rx_descr(E1000ECore *core, uint8_t *desc,
1348struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
1349    size_t ps_hdr_len, uint16_t(*written)[MAX_PS_BUFFERS])
1350{
1351    if (e1000e_rx_use_legacy_descriptor(core)) {
1352        assert(ps_hdr_len == 0);
1353        e1000e_write_lgcy_rx_descr(core, desc, pkt, rss_info, (*written)[0]);
1354    } else {
1355        if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
1356            e1000e_write_ps_rx_descr(core, desc, pkt, rss_info,
1357                                      ps_hdr_len, written);
1358        } else {
1359            assert(ps_hdr_len == 0);
1360            e1000e_write_ext_rx_descr(core, desc, pkt, rss_info,
1361                                       (*written)[0]);
1362        }
1363    }
1364}
1365
1366typedef struct e1000e_ba_state_st {
1367    uint16_t written[MAX_PS_BUFFERS];
1368    uint8_t cur_idx;
1369} e1000e_ba_state;
1370
1371static inline void
1372e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
1373                               hwaddr (*ba)[MAX_PS_BUFFERS],
1374                               e1000e_ba_state *bastate,
1375                               const char *data,
1376                               dma_addr_t data_len)
1377{
1378    assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]);
1379
1380    pci_dma_write(core->owner, (*ba)[0] + bastate->written[0], data, data_len);
1381    bastate->written[0] += data_len;
1382
1383    bastate->cur_idx = 1;
1384}
1385
1386static void
1387e1000e_write_to_rx_buffers(E1000ECore *core,
1388                           hwaddr (*ba)[MAX_PS_BUFFERS],
1389                           e1000e_ba_state *bastate,
1390                           const char *data,
1391                           dma_addr_t data_len)
1392{
1393    while (data_len > 0) {
1394        uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
1395        uint32_t cur_buf_bytes_left = cur_buf_len -
1396                                      bastate->written[bastate->cur_idx];
1397        uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
1398
1399        trace_e1000e_rx_desc_buff_write(bastate->cur_idx,
1400                                        (*ba)[bastate->cur_idx],
1401                                        bastate->written[bastate->cur_idx],
1402                                        data,
1403                                        bytes_to_write);
1404
1405        pci_dma_write(core->owner,
1406            (*ba)[bastate->cur_idx] + bastate->written[bastate->cur_idx],
1407            data, bytes_to_write);
1408
1409        bastate->written[bastate->cur_idx] += bytes_to_write;
1410        data += bytes_to_write;
1411        data_len -= bytes_to_write;
1412
1413        if (bastate->written[bastate->cur_idx] == cur_buf_len) {
1414            bastate->cur_idx++;
1415        }
1416
1417        assert(bastate->cur_idx < MAX_PS_BUFFERS);
1418    }
1419}
1420
1421static void
1422e1000e_update_rx_stats(E1000ECore *core,
1423                       size_t data_size,
1424                       size_t data_fcs_size)
1425{
1426    e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
1427
1428    switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
1429    case ETH_PKT_BCAST:
1430        e1000x_inc_reg_if_not_full(core->mac, BPRC);
1431        break;
1432
1433    case ETH_PKT_MCAST:
1434        e1000x_inc_reg_if_not_full(core->mac, MPRC);
1435        break;
1436
1437    default:
1438        break;
1439    }
1440}
1441
1442static inline bool
1443e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi)
1444{
1445    return e1000e_ring_free_descr_num(core, rxi) ==
1446           e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift;
1447}
1448
1449static bool
1450e1000e_do_ps(E1000ECore *core, struct NetRxPkt *pkt, size_t *hdr_len)
1451{
1452    bool isip4, isip6, isudp, istcp;
1453    bool fragment;
1454
1455    if (!e1000e_rx_use_ps_descriptor(core)) {
1456        return false;
1457    }
1458
1459    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1460
1461    if (isip4) {
1462        fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
1463    } else if (isip6) {
1464        fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
1465    } else {
1466        return false;
1467    }
1468
1469    if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
1470        return false;
1471    }
1472
1473    if (!fragment && (isudp || istcp)) {
1474        *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
1475    } else {
1476        *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
1477    }
1478
1479    if ((*hdr_len > core->rxbuf_sizes[0]) ||
1480        (*hdr_len > net_rx_pkt_get_total_len(pkt))) {
1481        return false;
1482    }
1483
1484    return true;
1485}
1486
1487static void
1488e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
1489                             const E1000E_RxRing *rxr,
1490                             const E1000E_RSSInfo *rss_info)
1491{
1492    PCIDevice *d = core->owner;
1493    dma_addr_t base;
1494    uint8_t desc[E1000_MAX_RX_DESC_LEN];
1495    size_t desc_size;
1496    size_t desc_offset = 0;
1497    size_t iov_ofs = 0;
1498
1499    struct iovec *iov = net_rx_pkt_get_iovec(pkt);
1500    size_t size = net_rx_pkt_get_total_len(pkt);
1501    size_t total_size = size + e1000x_fcs_len(core->mac);
1502    const E1000E_RingInfo *rxi;
1503    size_t ps_hdr_len = 0;
1504    bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len);
1505    bool is_first = true;
1506
1507    rxi = rxr->i;
1508
1509    do {
1510        hwaddr ba[MAX_PS_BUFFERS];
1511        e1000e_ba_state bastate = { { 0 } };
1512        bool is_last = false;
1513
1514        desc_size = total_size - desc_offset;
1515
1516        if (desc_size > core->rx_desc_buf_size) {
1517            desc_size = core->rx_desc_buf_size;
1518        }
1519
1520        if (e1000e_ring_empty(core, rxi)) {
1521            return;
1522        }
1523
1524        base = e1000e_ring_head_descr(core, rxi);
1525
1526        pci_dma_read(d, base, &desc, core->rx_desc_len);
1527
1528        trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
1529
1530        e1000e_read_rx_descr(core, desc, &ba);
1531
1532        if (ba[0]) {
1533            if (desc_offset < size) {
1534                static const uint32_t fcs_pad;
1535                size_t iov_copy;
1536                size_t copy_size = size - desc_offset;
1537                if (copy_size > core->rx_desc_buf_size) {
1538                    copy_size = core->rx_desc_buf_size;
1539                }
1540
1541                /* For PS mode copy the packet header first */
1542                if (do_ps) {
1543                    if (is_first) {
1544                        size_t ps_hdr_copied = 0;
1545                        do {
1546                            iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
1547                                           iov->iov_len - iov_ofs);
1548
1549                            e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate,
1550                                                      iov->iov_base, iov_copy);
1551
1552                            copy_size -= iov_copy;
1553                            ps_hdr_copied += iov_copy;
1554
1555                            iov_ofs += iov_copy;
1556                            if (iov_ofs == iov->iov_len) {
1557                                iov++;
1558                                iov_ofs = 0;
1559                            }
1560                        } while (ps_hdr_copied < ps_hdr_len);
1561
1562                        is_first = false;
1563                    } else {
1564                        /* Leave buffer 0 of each descriptor except first */
1565                        /* empty as per spec 7.1.5.1                      */
1566                        e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate,
1567                                                       NULL, 0);
1568                    }
1569                }
1570
1571                /* Copy packet payload */
1572                while (copy_size) {
1573                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1574
1575                    e1000e_write_to_rx_buffers(core, &ba, &bastate,
1576                                            iov->iov_base + iov_ofs, iov_copy);
1577
1578                    copy_size -= iov_copy;
1579                    iov_ofs += iov_copy;
1580                    if (iov_ofs == iov->iov_len) {
1581                        iov++;
1582                        iov_ofs = 0;
1583                    }
1584                }
1585
1586                if (desc_offset + desc_size >= total_size) {
1587                    /* Simulate FCS checksum presence in the last descriptor */
1588                    e1000e_write_to_rx_buffers(core, &ba, &bastate,
1589                          (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
1590                }
1591            }
1592        } else { /* as per intel docs; skip descriptors with null buf addr */
1593            trace_e1000e_rx_null_descriptor();
1594        }
1595        desc_offset += desc_size;
1596        if (desc_offset >= total_size) {
1597            is_last = true;
1598        }
1599
1600        e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL,
1601                           rss_info, do_ps ? ps_hdr_len : 0, &bastate.written);
1602        pci_dma_write(d, base, &desc, core->rx_desc_len);
1603
1604        e1000e_ring_advance(core, rxi,
1605                            core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
1606
1607    } while (desc_offset < total_size);
1608
1609    e1000e_update_rx_stats(core, size, total_size);
1610}
1611
1612static inline void
1613e1000e_rx_fix_l4_csum(E1000ECore *core, struct NetRxPkt *pkt)
1614{
1615    if (net_rx_pkt_has_virt_hdr(pkt)) {
1616        struct virtio_net_hdr *vhdr = net_rx_pkt_get_vhdr(pkt);
1617
1618        if (vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1619            net_rx_pkt_fix_l4_csum(pkt);
1620        }
1621    }
1622}
1623
1624ssize_t
1625e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
1626{
1627    static const int maximum_ethernet_hdr_len = (14 + 4);
1628    /* Min. octets in an ethernet frame sans FCS */
1629    static const int min_buf_size = 60;
1630
1631    uint32_t n = 0;
1632    uint8_t min_buf[min_buf_size];
1633    struct iovec min_iov;
1634    uint8_t *filter_buf;
1635    size_t size, orig_size;
1636    size_t iov_ofs = 0;
1637    E1000E_RxRing rxr;
1638    E1000E_RSSInfo rss_info;
1639    size_t total_size;
1640    ssize_t retval;
1641    bool rdmts_hit;
1642
1643    trace_e1000e_rx_receive_iov(iovcnt);
1644
1645    if (!e1000x_hw_rx_enabled(core->mac)) {
1646        return -1;
1647    }
1648
1649    /* Pull virtio header in */
1650    if (core->has_vnet) {
1651        net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt);
1652        iov_ofs = sizeof(struct virtio_net_hdr);
1653    }
1654
1655    filter_buf = iov->iov_base + iov_ofs;
1656    orig_size = iov_size(iov, iovcnt);
1657    size = orig_size - iov_ofs;
1658
1659    /* Pad to minimum Ethernet frame length */
1660    if (size < sizeof(min_buf)) {
1661        iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
1662        memset(&min_buf[size], 0, sizeof(min_buf) - size);
1663        e1000x_inc_reg_if_not_full(core->mac, RUC);
1664        min_iov.iov_base = filter_buf = min_buf;
1665        min_iov.iov_len = size = sizeof(min_buf);
1666        iovcnt = 1;
1667        iov = &min_iov;
1668        iov_ofs = 0;
1669    } else if (iov->iov_len < maximum_ethernet_hdr_len) {
1670        /* This is very unlikely, but may happen. */
1671        iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len);
1672        filter_buf = min_buf;
1673    }
1674
1675    /* Discard oversized packets if !LPE and !SBP. */
1676    if (e1000x_is_oversized(core->mac, size)) {
1677        return orig_size;
1678    }
1679
1680    net_rx_pkt_set_packet_type(core->rx_pkt,
1681        get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf)));
1682
1683    if (!e1000e_receive_filter(core, filter_buf, size)) {
1684        trace_e1000e_rx_flt_dropped();
1685        return orig_size;
1686    }
1687
1688    net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
1689                               e1000x_vlan_enabled(core->mac), core->vet);
1690
1691    e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info);
1692    e1000e_rx_ring_init(core, &rxr, rss_info.queue);
1693
1694    trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx);
1695
1696    total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
1697        e1000x_fcs_len(core->mac);
1698
1699    if (e1000e_has_rxbufs(core, rxr.i, total_size)) {
1700        e1000e_rx_fix_l4_csum(core, core->rx_pkt);
1701
1702        e1000e_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
1703
1704        retval = orig_size;
1705
1706        /* Perform small receive detection (RSRPD) */
1707        if (total_size < core->mac[RSRPD]) {
1708            n |= E1000_ICS_SRPD;
1709        }
1710
1711        /* Perform ACK receive detection */
1712        if  (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) &&
1713             (e1000e_is_tcp_ack(core, core->rx_pkt))) {
1714            n |= E1000_ICS_ACK;
1715        }
1716
1717        /* Check if receive descriptor minimum threshold hit */
1718        rdmts_hit = e1000e_rx_descr_threshold_hit(core, rxr.i);
1719        n |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit);
1720
1721        trace_e1000e_rx_written_to_guest(n);
1722    } else {
1723        n |= E1000_ICS_RXO;
1724        retval = 0;
1725
1726        trace_e1000e_rx_not_written_to_guest(n);
1727    }
1728
1729    if (!e1000e_intrmgr_delay_rx_causes(core, &n)) {
1730        trace_e1000e_rx_interrupt_set(n);
1731        e1000e_set_interrupt_cause(core, n);
1732    } else {
1733        trace_e1000e_rx_interrupt_delayed(n);
1734    }
1735
1736    return retval;
1737}
1738
1739static inline bool
1740e1000e_have_autoneg(E1000ECore *core)
1741{
1742    return core->phy[0][PHY_CTRL] & MII_CR_AUTO_NEG_EN;
1743}
1744
1745static void e1000e_update_flowctl_status(E1000ECore *core)
1746{
1747    if (e1000e_have_autoneg(core) &&
1748        core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE) {
1749        trace_e1000e_link_autoneg_flowctl(true);
1750        core->mac[CTRL] |= E1000_CTRL_TFCE | E1000_CTRL_RFCE;
1751    } else {
1752        trace_e1000e_link_autoneg_flowctl(false);
1753    }
1754}
1755
1756static inline void
1757e1000e_link_down(E1000ECore *core)
1758{
1759    e1000x_update_regs_on_link_down(core->mac, core->phy[0]);
1760    e1000e_update_flowctl_status(core);
1761}
1762
1763static inline void
1764e1000e_set_phy_ctrl(E1000ECore *core, int index, uint16_t val)
1765{
1766    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
1767    core->phy[0][PHY_CTRL] = val & ~(0x3f |
1768                                     MII_CR_RESET |
1769                                     MII_CR_RESTART_AUTO_NEG);
1770
1771    if ((val & MII_CR_RESTART_AUTO_NEG) &&
1772        e1000e_have_autoneg(core)) {
1773        e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer);
1774    }
1775}
1776
1777static void
1778e1000e_set_phy_oem_bits(E1000ECore *core, int index, uint16_t val)
1779{
1780    core->phy[0][PHY_OEM_BITS] = val & ~BIT(10);
1781
1782    if (val & BIT(10)) {
1783        e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer);
1784    }
1785}
1786
1787static void
1788e1000e_set_phy_page(E1000ECore *core, int index, uint16_t val)
1789{
1790    core->phy[0][PHY_PAGE] = val & PHY_PAGE_RW_MASK;
1791}
1792
1793void
1794e1000e_core_set_link_status(E1000ECore *core)
1795{
1796    NetClientState *nc = qemu_get_queue(core->owner_nic);
1797    uint32_t old_status = core->mac[STATUS];
1798
1799    trace_e1000e_link_status_changed(nc->link_down ? false : true);
1800
1801    if (nc->link_down) {
1802        e1000x_update_regs_on_link_down(core->mac, core->phy[0]);
1803    } else {
1804        if (e1000e_have_autoneg(core) &&
1805            !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1806            e1000x_restart_autoneg(core->mac, core->phy[0],
1807                                   core->autoneg_timer);
1808        } else {
1809            e1000x_update_regs_on_link_up(core->mac, core->phy[0]);
1810            e1000e_start_recv(core);
1811        }
1812    }
1813
1814    if (core->mac[STATUS] != old_status) {
1815        e1000e_set_interrupt_cause(core, E1000_ICR_LSC);
1816    }
1817}
1818
1819static void
1820e1000e_set_ctrl(E1000ECore *core, int index, uint32_t val)
1821{
1822    trace_e1000e_core_ctrl_write(index, val);
1823
1824    /* RST is self clearing */
1825    core->mac[CTRL] = val & ~E1000_CTRL_RST;
1826    core->mac[CTRL_DUP] = core->mac[CTRL];
1827
1828    trace_e1000e_link_set_params(
1829        !!(val & E1000_CTRL_ASDE),
1830        (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT,
1831        !!(val & E1000_CTRL_FRCSPD),
1832        !!(val & E1000_CTRL_FRCDPX),
1833        !!(val & E1000_CTRL_RFCE),
1834        !!(val & E1000_CTRL_TFCE));
1835
1836    if (val & E1000_CTRL_RST) {
1837        trace_e1000e_core_ctrl_sw_reset();
1838        e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
1839    }
1840
1841    if (val & E1000_CTRL_PHY_RST) {
1842        trace_e1000e_core_ctrl_phy_reset();
1843        core->mac[STATUS] |= E1000_STATUS_PHYRA;
1844    }
1845}
1846
1847static void
1848e1000e_set_rfctl(E1000ECore *core, int index, uint32_t val)
1849{
1850    trace_e1000e_rx_set_rfctl(val);
1851
1852    if (!(val & E1000_RFCTL_ISCSI_DIS)) {
1853        trace_e1000e_wrn_iscsi_filtering_not_supported();
1854    }
1855
1856    if (!(val & E1000_RFCTL_NFSW_DIS)) {
1857        trace_e1000e_wrn_nfsw_filtering_not_supported();
1858    }
1859
1860    if (!(val & E1000_RFCTL_NFSR_DIS)) {
1861        trace_e1000e_wrn_nfsr_filtering_not_supported();
1862    }
1863
1864    core->mac[RFCTL] = val;
1865}
1866
1867static void
1868e1000e_calc_per_desc_buf_size(E1000ECore *core)
1869{
1870    int i;
1871    core->rx_desc_buf_size = 0;
1872
1873    for (i = 0; i < ARRAY_SIZE(core->rxbuf_sizes); i++) {
1874        core->rx_desc_buf_size += core->rxbuf_sizes[i];
1875    }
1876}
1877
1878static void
1879e1000e_parse_rxbufsize(E1000ECore *core)
1880{
1881    uint32_t rctl = core->mac[RCTL];
1882
1883    memset(core->rxbuf_sizes, 0, sizeof(core->rxbuf_sizes));
1884
1885    if (rctl & E1000_RCTL_DTYP_MASK) {
1886        uint32_t bsize;
1887
1888        bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE0_MASK;
1889        core->rxbuf_sizes[0] = (bsize >> E1000_PSRCTL_BSIZE0_SHIFT) * 128;
1890
1891        bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE1_MASK;
1892        core->rxbuf_sizes[1] = (bsize >> E1000_PSRCTL_BSIZE1_SHIFT) * 1024;
1893
1894        bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE2_MASK;
1895        core->rxbuf_sizes[2] = (bsize >> E1000_PSRCTL_BSIZE2_SHIFT) * 1024;
1896
1897        bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE3_MASK;
1898        core->rxbuf_sizes[3] = (bsize >> E1000_PSRCTL_BSIZE3_SHIFT) * 1024;
1899    } else if (rctl & E1000_RCTL_FLXBUF_MASK) {
1900        int flxbuf = rctl & E1000_RCTL_FLXBUF_MASK;
1901        core->rxbuf_sizes[0] = (flxbuf >> E1000_RCTL_FLXBUF_SHIFT) * 1024;
1902    } else {
1903        core->rxbuf_sizes[0] = e1000x_rxbufsize(rctl);
1904    }
1905
1906    trace_e1000e_rx_desc_buff_sizes(core->rxbuf_sizes[0], core->rxbuf_sizes[1],
1907                                    core->rxbuf_sizes[2], core->rxbuf_sizes[3]);
1908
1909    e1000e_calc_per_desc_buf_size(core);
1910}
1911
1912static void
1913e1000e_calc_rxdesclen(E1000ECore *core)
1914{
1915    if (e1000e_rx_use_legacy_descriptor(core)) {
1916        core->rx_desc_len = sizeof(struct e1000_rx_desc);
1917    } else {
1918        if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
1919            core->rx_desc_len = sizeof(union e1000_rx_desc_packet_split);
1920        } else {
1921            core->rx_desc_len = sizeof(union e1000_rx_desc_extended);
1922        }
1923    }
1924    trace_e1000e_rx_desc_len(core->rx_desc_len);
1925}
1926
1927static void
1928e1000e_set_rx_control(E1000ECore *core, int index, uint32_t val)
1929{
1930    core->mac[RCTL] = val;
1931    trace_e1000e_rx_set_rctl(core->mac[RCTL]);
1932
1933    if (val & E1000_RCTL_EN) {
1934        e1000e_parse_rxbufsize(core);
1935        e1000e_calc_rxdesclen(core);
1936        core->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1 +
1937                                E1000_RING_DESC_LEN_SHIFT;
1938
1939        e1000e_start_recv(core);
1940    }
1941}
1942
1943static
1944void(*e1000e_phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE])
1945(E1000ECore *, int, uint16_t) = {
1946    [0] = {
1947        [PHY_CTRL]     = e1000e_set_phy_ctrl,
1948        [PHY_PAGE]     = e1000e_set_phy_page,
1949        [PHY_OEM_BITS] = e1000e_set_phy_oem_bits
1950    }
1951};
1952
1953static inline void
1954e1000e_clear_ims_bits(E1000ECore *core, uint32_t bits)
1955{
1956    trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits);
1957    core->mac[IMS] &= ~bits;
1958}
1959
1960static inline bool
1961e1000e_postpone_interrupt(bool *interrupt_pending,
1962                           E1000IntrDelayTimer *timer)
1963{
1964    if (timer->running) {
1965        trace_e1000e_irq_postponed_by_xitr(timer->delay_reg << 2);
1966
1967        *interrupt_pending = true;
1968        return true;
1969    }
1970
1971    if (timer->core->mac[timer->delay_reg] != 0) {
1972        e1000e_intrmgr_rearm_timer(timer);
1973    }
1974
1975    return false;
1976}
1977
1978static inline bool
1979e1000e_itr_should_postpone(E1000ECore *core)
1980{
1981    return e1000e_postpone_interrupt(&core->itr_intr_pending, &core->itr);
1982}
1983
1984static inline bool
1985e1000e_eitr_should_postpone(E1000ECore *core, int idx)
1986{
1987    return e1000e_postpone_interrupt(&core->eitr_intr_pending[idx],
1988                                     &core->eitr[idx]);
1989}
1990
1991static void
1992e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
1993{
1994    uint32_t effective_eiac;
1995
1996    if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
1997        uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
1998        if (vec < E1000E_MSIX_VEC_NUM) {
1999            if (!e1000e_eitr_should_postpone(core, vec)) {
2000                trace_e1000e_irq_msix_notify_vec(vec);
2001                msix_notify(core->owner, vec);
2002            }
2003        } else {
2004            trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
2005        }
2006    } else {
2007        trace_e1000e_wrn_msix_invalid(cause, int_cfg);
2008    }
2009
2010    if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) {
2011        trace_e1000e_irq_iam_clear_eiame(core->mac[IAM], cause);
2012        core->mac[IAM] &= ~cause;
2013    }
2014
2015    trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]);
2016
2017    effective_eiac = core->mac[EIAC] & cause;
2018
2019    core->mac[ICR] &= ~effective_eiac;
2020    core->msi_causes_pending &= ~effective_eiac;
2021
2022    if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
2023        core->mac[IMS] &= ~effective_eiac;
2024    }
2025}
2026
2027static void
2028e1000e_msix_notify(E1000ECore *core, uint32_t causes)
2029{
2030    if (causes & E1000_ICR_RXQ0) {
2031        e1000e_msix_notify_one(core, E1000_ICR_RXQ0,
2032                               E1000_IVAR_RXQ0(core->mac[IVAR]));
2033    }
2034
2035    if (causes & E1000_ICR_RXQ1) {
2036        e1000e_msix_notify_one(core, E1000_ICR_RXQ1,
2037                               E1000_IVAR_RXQ1(core->mac[IVAR]));
2038    }
2039
2040    if (causes & E1000_ICR_TXQ0) {
2041        e1000e_msix_notify_one(core, E1000_ICR_TXQ0,
2042                               E1000_IVAR_TXQ0(core->mac[IVAR]));
2043    }
2044
2045    if (causes & E1000_ICR_TXQ1) {
2046        e1000e_msix_notify_one(core, E1000_ICR_TXQ1,
2047                               E1000_IVAR_TXQ1(core->mac[IVAR]));
2048    }
2049
2050    if (causes & E1000_ICR_OTHER) {
2051        e1000e_msix_notify_one(core, E1000_ICR_OTHER,
2052                               E1000_IVAR_OTHER(core->mac[IVAR]));
2053    }
2054}
2055
2056static void
2057e1000e_msix_clear_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
2058{
2059    if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
2060        uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
2061        if (vec < E1000E_MSIX_VEC_NUM) {
2062            trace_e1000e_irq_msix_pending_clearing(cause, int_cfg, vec);
2063            msix_clr_pending(core->owner, vec);
2064        } else {
2065            trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
2066        }
2067    } else {
2068        trace_e1000e_wrn_msix_invalid(cause, int_cfg);
2069    }
2070}
2071
2072static void
2073e1000e_msix_clear(E1000ECore *core, uint32_t causes)
2074{
2075    if (causes & E1000_ICR_RXQ0) {
2076        e1000e_msix_clear_one(core, E1000_ICR_RXQ0,
2077                              E1000_IVAR_RXQ0(core->mac[IVAR]));
2078    }
2079
2080    if (causes & E1000_ICR_RXQ1) {
2081        e1000e_msix_clear_one(core, E1000_ICR_RXQ1,
2082                              E1000_IVAR_RXQ1(core->mac[IVAR]));
2083    }
2084
2085    if (causes & E1000_ICR_TXQ0) {
2086        e1000e_msix_clear_one(core, E1000_ICR_TXQ0,
2087                              E1000_IVAR_TXQ0(core->mac[IVAR]));
2088    }
2089
2090    if (causes & E1000_ICR_TXQ1) {
2091        e1000e_msix_clear_one(core, E1000_ICR_TXQ1,
2092                              E1000_IVAR_TXQ1(core->mac[IVAR]));
2093    }
2094
2095    if (causes & E1000_ICR_OTHER) {
2096        e1000e_msix_clear_one(core, E1000_ICR_OTHER,
2097                              E1000_IVAR_OTHER(core->mac[IVAR]));
2098    }
2099}
2100
2101static inline void
2102e1000e_fix_icr_asserted(E1000ECore *core)
2103{
2104    core->mac[ICR] &= ~E1000_ICR_ASSERTED;
2105    if (core->mac[ICR]) {
2106        core->mac[ICR] |= E1000_ICR_ASSERTED;
2107    }
2108
2109    trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]);
2110}
2111
2112static void
2113e1000e_send_msi(E1000ECore *core, bool msix)
2114{
2115    uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED;
2116
2117    core->msi_causes_pending &= causes;
2118    causes ^= core->msi_causes_pending;
2119    if (causes == 0) {
2120        return;
2121    }
2122    core->msi_causes_pending |= causes;
2123
2124    if (msix) {
2125        e1000e_msix_notify(core, causes);
2126    } else {
2127        if (!e1000e_itr_should_postpone(core)) {
2128            trace_e1000e_irq_msi_notify(causes);
2129            msi_notify(core->owner, 0);
2130        }
2131    }
2132}
2133
2134static void
2135e1000e_update_interrupt_state(E1000ECore *core)
2136{
2137    bool interrupts_pending;
2138    bool is_msix = msix_enabled(core->owner);
2139
2140    /* Set ICR[OTHER] for MSI-X */
2141    if (is_msix) {
2142        if (core->mac[ICR] & E1000_ICR_OTHER_CAUSES) {
2143            core->mac[ICR] |= E1000_ICR_OTHER;
2144            trace_e1000e_irq_add_msi_other(core->mac[ICR]);
2145        }
2146    }
2147
2148    e1000e_fix_icr_asserted(core);
2149
2150    /*
2151     * Make sure ICR and ICS registers have the same value.
2152     * The spec says that the ICS register is write-only.  However in practice,
2153     * on real hardware ICS is readable, and for reads it has the same value as
2154     * ICR (except that ICS does not have the clear on read behaviour of ICR).
2155     *
2156     * The VxWorks PRO/1000 driver uses this behaviour.
2157     */
2158    core->mac[ICS] = core->mac[ICR];
2159
2160    interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false;
2161    if (!interrupts_pending) {
2162        core->msi_causes_pending = 0;
2163    }
2164
2165    trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
2166                                        core->mac[ICR], core->mac[IMS]);
2167
2168    if (is_msix || msi_enabled(core->owner)) {
2169        if (interrupts_pending) {
2170            e1000e_send_msi(core, is_msix);
2171        }
2172    } else {
2173        if (interrupts_pending) {
2174            if (!e1000e_itr_should_postpone(core)) {
2175                e1000e_raise_legacy_irq(core);
2176            }
2177        } else {
2178            e1000e_lower_legacy_irq(core);
2179        }
2180    }
2181}
2182
2183static void
2184e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
2185{
2186    trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
2187
2188    val |= e1000e_intmgr_collect_delayed_causes(core);
2189    core->mac[ICR] |= val;
2190
2191    trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]);
2192
2193    e1000e_update_interrupt_state(core);
2194}
2195
2196static inline void
2197e1000e_autoneg_timer(void *opaque)
2198{
2199    E1000ECore *core = opaque;
2200    if (!qemu_get_queue(core->owner_nic)->link_down) {
2201        e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]);
2202        e1000e_start_recv(core);
2203
2204        e1000e_update_flowctl_status(core);
2205        /* signal link status change to the guest */
2206        e1000e_set_interrupt_cause(core, E1000_ICR_LSC);
2207    }
2208}
2209
2210static inline uint16_t
2211e1000e_get_reg_index_with_offset(const uint16_t *mac_reg_access, hwaddr addr)
2212{
2213    uint16_t index = (addr & 0x1ffff) >> 2;
2214    return index + (mac_reg_access[index] & 0xfffe);
2215}
2216
2217static const char e1000e_phy_regcap[E1000E_PHY_PAGES][0x20] = {
2218    [0] = {
2219        [PHY_CTRL]          = PHY_ANYPAGE | PHY_RW,
2220        [PHY_STATUS]        = PHY_ANYPAGE | PHY_R,
2221        [PHY_ID1]           = PHY_ANYPAGE | PHY_R,
2222        [PHY_ID2]           = PHY_ANYPAGE | PHY_R,
2223        [PHY_AUTONEG_ADV]   = PHY_ANYPAGE | PHY_RW,
2224        [PHY_LP_ABILITY]    = PHY_ANYPAGE | PHY_R,
2225        [PHY_AUTONEG_EXP]   = PHY_ANYPAGE | PHY_R,
2226        [PHY_NEXT_PAGE_TX]  = PHY_ANYPAGE | PHY_RW,
2227        [PHY_LP_NEXT_PAGE]  = PHY_ANYPAGE | PHY_R,
2228        [PHY_1000T_CTRL]    = PHY_ANYPAGE | PHY_RW,
2229        [PHY_1000T_STATUS]  = PHY_ANYPAGE | PHY_R,
2230        [PHY_EXT_STATUS]    = PHY_ANYPAGE | PHY_R,
2231        [PHY_PAGE]          = PHY_ANYPAGE | PHY_RW,
2232
2233        [PHY_COPPER_CTRL1]      = PHY_RW,
2234        [PHY_COPPER_STAT1]      = PHY_R,
2235        [PHY_COPPER_CTRL3]      = PHY_RW,
2236        [PHY_RX_ERR_CNTR]       = PHY_R,
2237        [PHY_OEM_BITS]          = PHY_RW,
2238        [PHY_BIAS_1]            = PHY_RW,
2239        [PHY_BIAS_2]            = PHY_RW,
2240        [PHY_COPPER_INT_ENABLE] = PHY_RW,
2241        [PHY_COPPER_STAT2]      = PHY_R,
2242        [PHY_COPPER_CTRL2]      = PHY_RW
2243    },
2244    [2] = {
2245        [PHY_MAC_CTRL1]         = PHY_RW,
2246        [PHY_MAC_INT_ENABLE]    = PHY_RW,
2247        [PHY_MAC_STAT]          = PHY_R,
2248        [PHY_MAC_CTRL2]         = PHY_RW
2249    },
2250    [3] = {
2251        [PHY_LED_03_FUNC_CTRL1] = PHY_RW,
2252        [PHY_LED_03_POL_CTRL]   = PHY_RW,
2253        [PHY_LED_TIMER_CTRL]    = PHY_RW,
2254        [PHY_LED_45_CTRL]       = PHY_RW
2255    },
2256    [5] = {
2257        [PHY_1000T_SKEW]        = PHY_R,
2258        [PHY_1000T_SWAP]        = PHY_R
2259    },
2260    [6] = {
2261        [PHY_CRC_COUNTERS]      = PHY_R
2262    }
2263};
2264
2265static bool
2266e1000e_phy_reg_check_cap(E1000ECore *core, uint32_t addr,
2267                         char cap, uint8_t *page)
2268{
2269    *page =
2270        (e1000e_phy_regcap[0][addr] & PHY_ANYPAGE) ? 0
2271                                                    : core->phy[0][PHY_PAGE];
2272
2273    if (*page >= E1000E_PHY_PAGES) {
2274        return false;
2275    }
2276
2277    return e1000e_phy_regcap[*page][addr] & cap;
2278}
2279
2280static void
2281e1000e_phy_reg_write(E1000ECore *core, uint8_t page,
2282                     uint32_t addr, uint16_t data)
2283{
2284    assert(page < E1000E_PHY_PAGES);
2285    assert(addr < E1000E_PHY_PAGE_SIZE);
2286
2287    if (e1000e_phyreg_writeops[page][addr]) {
2288        e1000e_phyreg_writeops[page][addr](core, addr, data);
2289    } else {
2290        core->phy[page][addr] = data;
2291    }
2292}
2293
2294static void
2295e1000e_set_mdic(E1000ECore *core, int index, uint32_t val)
2296{
2297    uint32_t data = val & E1000_MDIC_DATA_MASK;
2298    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
2299    uint8_t page;
2300
2301    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) { /* phy # */
2302        val = core->mac[MDIC] | E1000_MDIC_ERROR;
2303    } else if (val & E1000_MDIC_OP_READ) {
2304        if (!e1000e_phy_reg_check_cap(core, addr, PHY_R, &page)) {
2305            trace_e1000e_core_mdic_read_unhandled(page, addr);
2306            val |= E1000_MDIC_ERROR;
2307        } else {
2308            val = (val ^ data) | core->phy[page][addr];
2309            trace_e1000e_core_mdic_read(page, addr, val);
2310        }
2311    } else if (val & E1000_MDIC_OP_WRITE) {
2312        if (!e1000e_phy_reg_check_cap(core, addr, PHY_W, &page)) {
2313            trace_e1000e_core_mdic_write_unhandled(page, addr);
2314            val |= E1000_MDIC_ERROR;
2315        } else {
2316            trace_e1000e_core_mdic_write(page, addr, data);
2317            e1000e_phy_reg_write(core, page, addr, data);
2318        }
2319    }
2320    core->mac[MDIC] = val | E1000_MDIC_READY;
2321
2322    if (val & E1000_MDIC_INT_EN) {
2323        e1000e_set_interrupt_cause(core, E1000_ICR_MDAC);
2324    }
2325}
2326
2327static void
2328e1000e_set_rdt(E1000ECore *core, int index, uint32_t val)
2329{
2330    core->mac[index] = val & 0xffff;
2331    trace_e1000e_rx_set_rdt(e1000e_mq_queue_idx(RDT0, index), val);
2332    e1000e_start_recv(core);
2333}
2334
2335static void
2336e1000e_set_status(E1000ECore *core, int index, uint32_t val)
2337{
2338    if ((val & E1000_STATUS_PHYRA) == 0) {
2339        core->mac[index] &= ~E1000_STATUS_PHYRA;
2340    }
2341}
2342
2343static void
2344e1000e_set_ctrlext(E1000ECore *core, int index, uint32_t val)
2345{
2346    trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
2347                                     !!(val & E1000_CTRL_EXT_SPD_BYPS));
2348
2349    /* Zero self-clearing bits */
2350    val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
2351    core->mac[CTRL_EXT] = val;
2352}
2353
2354static void
2355e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val)
2356{
2357    int i;
2358
2359    core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK;
2360
2361    if (!msix_enabled(core->owner)) {
2362        return;
2363    }
2364
2365    for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
2366        if (core->mac[PBACLR] & BIT(i)) {
2367            msix_clr_pending(core->owner, i);
2368        }
2369    }
2370}
2371
2372static void
2373e1000e_set_fcrth(E1000ECore *core, int index, uint32_t val)
2374{
2375    core->mac[FCRTH] = val & 0xFFF8;
2376}
2377
2378static void
2379e1000e_set_fcrtl(E1000ECore *core, int index, uint32_t val)
2380{
2381    core->mac[FCRTL] = val & 0x8000FFF8;
2382}
2383
2384static inline void
2385e1000e_set_16bit(E1000ECore *core, int index, uint32_t val)
2386{
2387    core->mac[index] = val & 0xffff;
2388}
2389
2390static void
2391e1000e_set_12bit(E1000ECore *core, int index, uint32_t val)
2392{
2393    core->mac[index] = val & 0xfff;
2394}
2395
2396static void
2397e1000e_set_vet(E1000ECore *core, int index, uint32_t val)
2398{
2399    core->mac[VET] = val & 0xffff;
2400    core->vet = le16_to_cpu(core->mac[VET]);
2401    trace_e1000e_vlan_vet(core->vet);
2402}
2403
2404static void
2405e1000e_set_dlen(E1000ECore *core, int index, uint32_t val)
2406{
2407    core->mac[index] = val & E1000_XDLEN_MASK;
2408}
2409
2410static void
2411e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
2412{
2413    core->mac[index] = val & E1000_XDBAL_MASK;
2414}
2415
2416static void
2417e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
2418{
2419    E1000E_TxRing txr;
2420    core->mac[index] = val;
2421
2422    if (core->mac[TARC0] & E1000_TARC_ENABLE) {
2423        e1000e_tx_ring_init(core, &txr, 0);
2424        e1000e_start_xmit(core, &txr);
2425    }
2426
2427    if (core->mac[TARC1] & E1000_TARC_ENABLE) {
2428        e1000e_tx_ring_init(core, &txr, 1);
2429        e1000e_start_xmit(core, &txr);
2430    }
2431}
2432
2433static void
2434e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
2435{
2436    E1000E_TxRing txr;
2437    int qidx = e1000e_mq_queue_idx(TDT, index);
2438    uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
2439
2440    core->mac[index] = val & 0xffff;
2441
2442    if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
2443        e1000e_tx_ring_init(core, &txr, qidx);
2444        e1000e_start_xmit(core, &txr);
2445    }
2446}
2447
2448static void
2449e1000e_set_ics(E1000ECore *core, int index, uint32_t val)
2450{
2451    trace_e1000e_irq_write_ics(val);
2452    e1000e_set_interrupt_cause(core, val);
2453}
2454
2455static void
2456e1000e_set_icr(E1000ECore *core, int index, uint32_t val)
2457{
2458    uint32_t icr = 0;
2459    if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
2460        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
2461        trace_e1000e_irq_icr_process_iame();
2462        e1000e_clear_ims_bits(core, core->mac[IAM]);
2463    }
2464
2465    icr = core->mac[ICR] & ~val;
2466    /* Windows driver expects that the "receive overrun" bit and other
2467     * ones to be cleared when the "Other" bit (#24) is cleared.
2468     */
2469    icr = (val & E1000_ICR_OTHER) ? (icr & ~E1000_ICR_OTHER_CAUSES) : icr;
2470    trace_e1000e_irq_icr_write(val, core->mac[ICR], icr);
2471    core->mac[ICR] = icr;
2472    e1000e_update_interrupt_state(core);
2473}
2474
2475static void
2476e1000e_set_imc(E1000ECore *core, int index, uint32_t val)
2477{
2478    trace_e1000e_irq_ims_clear_set_imc(val);
2479    e1000e_clear_ims_bits(core, val);
2480    e1000e_update_interrupt_state(core);
2481}
2482
2483static void
2484e1000e_set_ims(E1000ECore *core, int index, uint32_t val)
2485{
2486    static const uint32_t ims_ext_mask =
2487        E1000_IMS_RXQ0 | E1000_IMS_RXQ1 |
2488        E1000_IMS_TXQ0 | E1000_IMS_TXQ1 |
2489        E1000_IMS_OTHER;
2490
2491    static const uint32_t ims_valid_mask =
2492        E1000_IMS_TXDW      | E1000_IMS_TXQE    | E1000_IMS_LSC  |
2493        E1000_IMS_RXDMT0    | E1000_IMS_RXO     | E1000_IMS_RXT0 |
2494        E1000_IMS_MDAC      | E1000_IMS_TXD_LOW | E1000_IMS_SRPD |
2495        E1000_IMS_ACK       | E1000_IMS_MNG     | E1000_IMS_RXQ0 |
2496        E1000_IMS_RXQ1      | E1000_IMS_TXQ0    | E1000_IMS_TXQ1 |
2497        E1000_IMS_OTHER;
2498
2499    uint32_t valid_val = val & ims_valid_mask;
2500
2501    trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val);
2502    core->mac[IMS] |= valid_val;
2503
2504    if ((valid_val & ims_ext_mask) &&
2505        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) &&
2506        msix_enabled(core->owner)) {
2507        e1000e_msix_clear(core, valid_val);
2508    }
2509
2510    if ((valid_val == ims_valid_mask) &&
2511        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA)) {
2512        trace_e1000e_irq_fire_all_timers(val);
2513        e1000e_intrmgr_fire_all_timers(core);
2514    }
2515
2516    e1000e_update_interrupt_state(core);
2517}
2518
2519static void
2520e1000e_set_rdtr(E1000ECore *core, int index, uint32_t val)
2521{
2522    e1000e_set_16bit(core, index, val);
2523
2524    if ((val & E1000_RDTR_FPD) && (core->rdtr.running)) {
2525        trace_e1000e_irq_rdtr_fpd_running();
2526        e1000e_intrmgr_fire_delayed_interrupts(core);
2527    } else {
2528        trace_e1000e_irq_rdtr_fpd_not_running();
2529    }
2530}
2531
2532static void
2533e1000e_set_tidv(E1000ECore *core, int index, uint32_t val)
2534{
2535    e1000e_set_16bit(core, index, val);
2536
2537    if ((val & E1000_TIDV_FPD) && (core->tidv.running)) {
2538        trace_e1000e_irq_tidv_fpd_running();
2539        e1000e_intrmgr_fire_delayed_interrupts(core);
2540    } else {
2541        trace_e1000e_irq_tidv_fpd_not_running();
2542    }
2543}
2544
2545static uint32_t
2546e1000e_mac_readreg(E1000ECore *core, int index)
2547{
2548    return core->mac[index];
2549}
2550
2551static uint32_t
2552e1000e_mac_ics_read(E1000ECore *core, int index)
2553{
2554    trace_e1000e_irq_read_ics(core->mac[ICS]);
2555    return core->mac[ICS];
2556}
2557
2558static uint32_t
2559e1000e_mac_ims_read(E1000ECore *core, int index)
2560{
2561    trace_e1000e_irq_read_ims(core->mac[IMS]);
2562    return core->mac[IMS];
2563}
2564
2565#define E1000E_LOW_BITS_READ_FUNC(num)                      \
2566    static uint32_t                                         \
2567    e1000e_mac_low##num##_read(E1000ECore *core, int index) \
2568    {                                                       \
2569        return core->mac[index] & (BIT(num) - 1);           \
2570    }                                                       \
2571
2572#define E1000E_LOW_BITS_READ(num)                           \
2573    e1000e_mac_low##num##_read
2574
2575E1000E_LOW_BITS_READ_FUNC(4);
2576E1000E_LOW_BITS_READ_FUNC(6);
2577E1000E_LOW_BITS_READ_FUNC(11);
2578E1000E_LOW_BITS_READ_FUNC(13);
2579E1000E_LOW_BITS_READ_FUNC(16);
2580
2581static uint32_t
2582e1000e_mac_swsm_read(E1000ECore *core, int index)
2583{
2584    uint32_t val = core->mac[SWSM];
2585    core->mac[SWSM] = val | 1;
2586    return val;
2587}
2588
2589static uint32_t
2590e1000e_mac_itr_read(E1000ECore *core, int index)
2591{
2592    return core->itr_guest_value;
2593}
2594
2595static uint32_t
2596e1000e_mac_eitr_read(E1000ECore *core, int index)
2597{
2598    return core->eitr_guest_value[index - EITR];
2599}
2600
2601static uint32_t
2602e1000e_mac_icr_read(E1000ECore *core, int index)
2603{
2604    uint32_t ret = core->mac[ICR];
2605    trace_e1000e_irq_icr_read_entry(ret);
2606
2607    if (core->mac[IMS] == 0) {
2608        trace_e1000e_irq_icr_clear_zero_ims();
2609        core->mac[ICR] = 0;
2610    }
2611
2612    if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
2613        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
2614        trace_e1000e_irq_icr_clear_iame();
2615        core->mac[ICR] = 0;
2616        trace_e1000e_irq_icr_process_iame();
2617        e1000e_clear_ims_bits(core, core->mac[IAM]);
2618    }
2619
2620    trace_e1000e_irq_icr_read_exit(core->mac[ICR]);
2621    e1000e_update_interrupt_state(core);
2622    return ret;
2623}
2624
2625static uint32_t
2626e1000e_mac_read_clr4(E1000ECore *core, int index)
2627{
2628    uint32_t ret = core->mac[index];
2629
2630    core->mac[index] = 0;
2631    return ret;
2632}
2633
2634static uint32_t
2635e1000e_mac_read_clr8(E1000ECore *core, int index)
2636{
2637    uint32_t ret = core->mac[index];
2638
2639    core->mac[index] = 0;
2640    core->mac[index - 1] = 0;
2641    return ret;
2642}
2643
2644static uint32_t
2645e1000e_get_ctrl(E1000ECore *core, int index)
2646{
2647    uint32_t val = core->mac[CTRL];
2648
2649    trace_e1000e_link_read_params(
2650        !!(val & E1000_CTRL_ASDE),
2651        (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT,
2652        !!(val & E1000_CTRL_FRCSPD),
2653        !!(val & E1000_CTRL_FRCDPX),
2654        !!(val & E1000_CTRL_RFCE),
2655        !!(val & E1000_CTRL_TFCE));
2656
2657    return val;
2658}
2659
2660static uint32_t
2661e1000e_get_status(E1000ECore *core, int index)
2662{
2663    uint32_t res = core->mac[STATUS];
2664
2665    if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) {
2666        res |= E1000_STATUS_GIO_MASTER_ENABLE;
2667    }
2668
2669    if (core->mac[CTRL] & E1000_CTRL_FRCDPX) {
2670        res |= (core->mac[CTRL] & E1000_CTRL_FD) ? E1000_STATUS_FD : 0;
2671    } else {
2672        res |= E1000_STATUS_FD;
2673    }
2674
2675    if ((core->mac[CTRL] & E1000_CTRL_FRCSPD) ||
2676        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_SPD_BYPS)) {
2677        switch (core->mac[CTRL] & E1000_CTRL_SPD_SEL) {
2678        case E1000_CTRL_SPD_10:
2679            res |= E1000_STATUS_SPEED_10;
2680            break;
2681        case E1000_CTRL_SPD_100:
2682            res |= E1000_STATUS_SPEED_100;
2683            break;
2684        case E1000_CTRL_SPD_1000:
2685        default:
2686            res |= E1000_STATUS_SPEED_1000;
2687            break;
2688        }
2689    } else {
2690        res |= E1000_STATUS_SPEED_1000;
2691    }
2692
2693    trace_e1000e_link_status(
2694        !!(res & E1000_STATUS_LU),
2695        !!(res & E1000_STATUS_FD),
2696        (res & E1000_STATUS_SPEED_MASK) >> E1000_STATUS_SPEED_SHIFT,
2697        (res & E1000_STATUS_ASDV) >> E1000_STATUS_ASDV_SHIFT);
2698
2699    return res;
2700}
2701
2702static uint32_t
2703e1000e_get_tarc(E1000ECore *core, int index)
2704{
2705    return core->mac[index] & ((BIT(11) - 1) |
2706                                BIT(27)      |
2707                                BIT(28)      |
2708                                BIT(29)      |
2709                                BIT(30));
2710}
2711
2712static void
2713e1000e_mac_writereg(E1000ECore *core, int index, uint32_t val)
2714{
2715    core->mac[index] = val;
2716}
2717
2718static void
2719e1000e_mac_setmacaddr(E1000ECore *core, int index, uint32_t val)
2720{
2721    uint32_t macaddr[2];
2722
2723    core->mac[index] = val;
2724
2725    macaddr[0] = cpu_to_le32(core->mac[RA]);
2726    macaddr[1] = cpu_to_le32(core->mac[RA + 1]);
2727    qemu_format_nic_info_str(qemu_get_queue(core->owner_nic),
2728        (uint8_t *) macaddr);
2729
2730    trace_e1000e_mac_set_sw(MAC_ARG(macaddr));
2731}
2732
2733static void
2734e1000e_set_eecd(E1000ECore *core, int index, uint32_t val)
2735{
2736    static const uint32_t ro_bits = E1000_EECD_PRES          |
2737                                    E1000_EECD_AUTO_RD       |
2738                                    E1000_EECD_SIZE_EX_MASK;
2739
2740    core->mac[EECD] = (core->mac[EECD] & ro_bits) | (val & ~ro_bits);
2741}
2742
2743static void
2744e1000e_set_eerd(E1000ECore *core, int index, uint32_t val)
2745{
2746    uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK;
2747    uint32_t flags = 0;
2748    uint32_t data = 0;
2749
2750    if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) {
2751        data = core->eeprom[addr];
2752        flags = E1000_EERW_DONE;
2753    }
2754
2755    core->mac[EERD] = flags                           |
2756                      (addr << E1000_EERW_ADDR_SHIFT) |
2757                      (data << E1000_EERW_DATA_SHIFT);
2758}
2759
2760static void
2761e1000e_set_eewr(E1000ECore *core, int index, uint32_t val)
2762{
2763    uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK;
2764    uint32_t data = (val >> E1000_EERW_DATA_SHIFT) & E1000_EERW_DATA_MASK;
2765    uint32_t flags = 0;
2766
2767    if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) {
2768        core->eeprom[addr] = data;
2769        flags = E1000_EERW_DONE;
2770    }
2771
2772    core->mac[EERD] = flags                           |
2773                      (addr << E1000_EERW_ADDR_SHIFT) |
2774                      (data << E1000_EERW_DATA_SHIFT);
2775}
2776
2777static void
2778e1000e_set_rxdctl(E1000ECore *core, int index, uint32_t val)
2779{
2780    core->mac[RXDCTL] = core->mac[RXDCTL1] = val;
2781}
2782
2783static void
2784e1000e_set_itr(E1000ECore *core, int index, uint32_t val)
2785{
2786    uint32_t interval = val & 0xffff;
2787
2788    trace_e1000e_irq_itr_set(val);
2789
2790    core->itr_guest_value = interval;
2791    core->mac[index] = MAX(interval, E1000E_MIN_XITR);
2792}
2793
2794static void
2795e1000e_set_eitr(E1000ECore *core, int index, uint32_t val)
2796{
2797    uint32_t interval = val & 0xffff;
2798    uint32_t eitr_num = index - EITR;
2799
2800    trace_e1000e_irq_eitr_set(eitr_num, val);
2801
2802    core->eitr_guest_value[eitr_num] = interval;
2803    core->mac[index] = MAX(interval, E1000E_MIN_XITR);
2804}
2805
2806static void
2807e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val)
2808{
2809    if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) {
2810
2811        if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
2812            qemu_log_mask(LOG_GUEST_ERROR,
2813                          "e1000e: PSRCTL.BSIZE0 cannot be zero");
2814            return;
2815        }
2816
2817        if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
2818            qemu_log_mask(LOG_GUEST_ERROR,
2819                          "e1000e: PSRCTL.BSIZE1 cannot be zero");
2820            return;
2821        }
2822    }
2823
2824    core->mac[PSRCTL] = val;
2825}
2826
2827static void
2828e1000e_update_rx_offloads(E1000ECore *core)
2829{
2830    int cso_state = e1000e_rx_l4_cso_enabled(core);
2831
2832    trace_e1000e_rx_set_cso(cso_state);
2833
2834    if (core->has_vnet) {
2835        qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
2836                         cso_state, 0, 0, 0, 0);
2837    }
2838}
2839
2840static void
2841e1000e_set_rxcsum(E1000ECore *core, int index, uint32_t val)
2842{
2843    core->mac[RXCSUM] = val;
2844    e1000e_update_rx_offloads(core);
2845}
2846
2847static void
2848e1000e_set_gcr(E1000ECore *core, int index, uint32_t val)
2849{
2850    uint32_t ro_bits = core->mac[GCR] & E1000_GCR_RO_BITS;
2851    core->mac[GCR] = (val & ~E1000_GCR_RO_BITS) | ro_bits;
2852}
2853
2854#define e1000e_getreg(x)    [x] = e1000e_mac_readreg
2855typedef uint32_t (*readops)(E1000ECore *, int);
2856static const readops e1000e_macreg_readops[] = {
2857    e1000e_getreg(PBA),
2858    e1000e_getreg(WUFC),
2859    e1000e_getreg(MANC),
2860    e1000e_getreg(TOTL),
2861    e1000e_getreg(RDT0),
2862    e1000e_getreg(RDBAH0),
2863    e1000e_getreg(TDBAL1),
2864    e1000e_getreg(RDLEN0),
2865    e1000e_getreg(RDH1),
2866    e1000e_getreg(LATECOL),
2867    e1000e_getreg(SEQEC),
2868    e1000e_getreg(XONTXC),
2869    e1000e_getreg(WUS),
2870    e1000e_getreg(GORCL),
2871    e1000e_getreg(MGTPRC),
2872    e1000e_getreg(EERD),
2873    e1000e_getreg(EIAC),
2874    e1000e_getreg(PSRCTL),
2875    e1000e_getreg(MANC2H),
2876    e1000e_getreg(RXCSUM),
2877    e1000e_getreg(GSCL_3),
2878    e1000e_getreg(GSCN_2),
2879    e1000e_getreg(RSRPD),
2880    e1000e_getreg(RDBAL1),
2881    e1000e_getreg(FCAH),
2882    e1000e_getreg(FCRTH),
2883    e1000e_getreg(FLOP),
2884    e1000e_getreg(FLASHT),
2885    e1000e_getreg(RXSTMPH),
2886    e1000e_getreg(TXSTMPL),
2887    e1000e_getreg(TIMADJL),
2888    e1000e_getreg(TXDCTL),
2889    e1000e_getreg(RDH0),
2890    e1000e_getreg(TDT1),
2891    e1000e_getreg(TNCRS),
2892    e1000e_getreg(RJC),
2893    e1000e_getreg(IAM),
2894    e1000e_getreg(GSCL_2),
2895    e1000e_getreg(RDBAH1),
2896    e1000e_getreg(FLSWDATA),
2897    e1000e_getreg(RXSATRH),
2898    e1000e_getreg(TIPG),
2899    e1000e_getreg(FLMNGCTL),
2900    e1000e_getreg(FLMNGCNT),
2901    e1000e_getreg(TSYNCTXCTL),
2902    e1000e_getreg(EXTCNF_SIZE),
2903    e1000e_getreg(EXTCNF_CTRL),
2904    e1000e_getreg(EEMNGDATA),
2905    e1000e_getreg(CTRL_EXT),
2906    e1000e_getreg(SYSTIMH),
2907    e1000e_getreg(EEMNGCTL),
2908    e1000e_getreg(FLMNGDATA),
2909    e1000e_getreg(TSYNCRXCTL),
2910    e1000e_getreg(TDH),
2911    e1000e_getreg(LEDCTL),
2912    e1000e_getreg(TCTL),
2913    e1000e_getreg(TDBAL),
2914    e1000e_getreg(TDLEN),
2915    e1000e_getreg(TDH1),
2916    e1000e_getreg(RADV),
2917    e1000e_getreg(ECOL),
2918    e1000e_getreg(DC),
2919    e1000e_getreg(RLEC),
2920    e1000e_getreg(XOFFTXC),
2921    e1000e_getreg(RFC),
2922    e1000e_getreg(RNBC),
2923    e1000e_getreg(MGTPTC),
2924    e1000e_getreg(TIMINCA),
2925    e1000e_getreg(RXCFGL),
2926    e1000e_getreg(MFUTP01),
2927    e1000e_getreg(FACTPS),
2928    e1000e_getreg(GSCL_1),
2929    e1000e_getreg(GSCN_0),
2930    e1000e_getreg(GCR2),
2931    e1000e_getreg(RDT1),
2932    e1000e_getreg(PBACLR),
2933    e1000e_getreg(FCTTV),
2934    e1000e_getreg(EEWR),
2935    e1000e_getreg(FLSWCTL),
2936    e1000e_getreg(RXDCTL1),
2937    e1000e_getreg(RXSATRL),
2938    e1000e_getreg(SYSTIML),
2939    e1000e_getreg(RXUDP),
2940    e1000e_getreg(TORL),
2941    e1000e_getreg(TDLEN1),
2942    e1000e_getreg(MCC),
2943    e1000e_getreg(WUC),
2944    e1000e_getreg(EECD),
2945    e1000e_getreg(MFUTP23),
2946    e1000e_getreg(RAID),
2947    e1000e_getreg(FCRTV),
2948    e1000e_getreg(TXDCTL1),
2949    e1000e_getreg(RCTL),
2950    e1000e_getreg(TDT),
2951    e1000e_getreg(MDIC),
2952    e1000e_getreg(FCRUC),
2953    e1000e_getreg(VET),
2954    e1000e_getreg(RDBAL0),
2955    e1000e_getreg(TDBAH1),
2956    e1000e_getreg(RDTR),
2957    e1000e_getreg(SCC),
2958    e1000e_getreg(COLC),
2959    e1000e_getreg(CEXTERR),
2960    e1000e_getreg(XOFFRXC),
2961    e1000e_getreg(IPAV),
2962    e1000e_getreg(GOTCL),
2963    e1000e_getreg(MGTPDC),
2964    e1000e_getreg(GCR),
2965    e1000e_getreg(IVAR),
2966    e1000e_getreg(POEMB),
2967    e1000e_getreg(MFVAL),
2968    e1000e_getreg(FUNCTAG),
2969    e1000e_getreg(GSCL_4),
2970    e1000e_getreg(GSCN_3),
2971    e1000e_getreg(MRQC),
2972    e1000e_getreg(RDLEN1),
2973    e1000e_getreg(FCT),
2974    e1000e_getreg(FLA),
2975    e1000e_getreg(FLOL),
2976    e1000e_getreg(RXDCTL),
2977    e1000e_getreg(RXSTMPL),
2978    e1000e_getreg(TXSTMPH),
2979    e1000e_getreg(TIMADJH),
2980    e1000e_getreg(FCRTL),
2981    e1000e_getreg(TDBAH),
2982    e1000e_getreg(TADV),
2983    e1000e_getreg(XONRXC),
2984    e1000e_getreg(TSCTFC),
2985    e1000e_getreg(RFCTL),
2986    e1000e_getreg(GSCN_1),
2987    e1000e_getreg(FCAL),
2988    e1000e_getreg(FLSWCNT),
2989
2990    [TOTH]    = e1000e_mac_read_clr8,
2991    [GOTCH]   = e1000e_mac_read_clr8,
2992    [PRC64]   = e1000e_mac_read_clr4,
2993    [PRC255]  = e1000e_mac_read_clr4,
2994    [PRC1023] = e1000e_mac_read_clr4,
2995    [PTC64]   = e1000e_mac_read_clr4,
2996    [PTC255]  = e1000e_mac_read_clr4,
2997    [PTC1023] = e1000e_mac_read_clr4,
2998    [GPRC]    = e1000e_mac_read_clr4,
2999    [TPT]     = e1000e_mac_read_clr4,
3000    [RUC]     = e1000e_mac_read_clr4,
3001    [BPRC]    = e1000e_mac_read_clr4,
3002    [MPTC]    = e1000e_mac_read_clr4,
3003    [IAC]     = e1000e_mac_read_clr4,
3004    [ICR]     = e1000e_mac_icr_read,
3005    [RDFH]    = E1000E_LOW_BITS_READ(13),
3006    [RDFHS]   = E1000E_LOW_BITS_READ(13),
3007    [RDFPC]   = E1000E_LOW_BITS_READ(13),
3008    [TDFH]    = E1000E_LOW_BITS_READ(13),
3009    [TDFHS]   = E1000E_LOW_BITS_READ(13),
3010    [STATUS]  = e1000e_get_status,
3011    [TARC0]   = e1000e_get_tarc,
3012    [PBS]     = E1000E_LOW_BITS_READ(6),
3013    [ICS]     = e1000e_mac_ics_read,
3014    [AIT]     = E1000E_LOW_BITS_READ(16),
3015    [TORH]    = e1000e_mac_read_clr8,
3016    [GORCH]   = e1000e_mac_read_clr8,
3017    [PRC127]  = e1000e_mac_read_clr4,
3018    [PRC511]  = e1000e_mac_read_clr4,
3019    [PRC1522] = e1000e_mac_read_clr4,
3020    [PTC127]  = e1000e_mac_read_clr4,
3021    [PTC511]  = e1000e_mac_read_clr4,
3022    [PTC1522] = e1000e_mac_read_clr4,
3023    [GPTC]    = e1000e_mac_read_clr4,
3024    [TPR]     = e1000e_mac_read_clr4,
3025    [ROC]     = e1000e_mac_read_clr4,
3026    [MPRC]    = e1000e_mac_read_clr4,
3027    [BPTC]    = e1000e_mac_read_clr4,
3028    [TSCTC]   = e1000e_mac_read_clr4,
3029    [ITR]     = e1000e_mac_itr_read,
3030    [RDFT]    = E1000E_LOW_BITS_READ(13),
3031    [RDFTS]   = E1000E_LOW_BITS_READ(13),
3032    [TDFPC]   = E1000E_LOW_BITS_READ(13),
3033    [TDFT]    = E1000E_LOW_BITS_READ(13),
3034    [TDFTS]   = E1000E_LOW_BITS_READ(13),
3035    [CTRL]    = e1000e_get_ctrl,
3036    [TARC1]   = e1000e_get_tarc,
3037    [SWSM]    = e1000e_mac_swsm_read,
3038    [IMS]     = e1000e_mac_ims_read,
3039
3040    [CRCERRS ... MPC]      = e1000e_mac_readreg,
3041    [IP6AT ... IP6AT + 3]  = e1000e_mac_readreg,
3042    [IP4AT ... IP4AT + 6]  = e1000e_mac_readreg,
3043    [RA ... RA + 31]       = e1000e_mac_readreg,
3044    [WUPM ... WUPM + 31]   = e1000e_mac_readreg,
3045    [MTA ... MTA + 127]    = e1000e_mac_readreg,
3046    [VFTA ... VFTA + 127]  = e1000e_mac_readreg,
3047    [FFMT ... FFMT + 254]  = E1000E_LOW_BITS_READ(4),
3048    [FFVT ... FFVT + 254]  = e1000e_mac_readreg,
3049    [MDEF ... MDEF + 7]    = e1000e_mac_readreg,
3050    [FFLT ... FFLT + 10]   = E1000E_LOW_BITS_READ(11),
3051    [FTFT ... FTFT + 254]  = e1000e_mac_readreg,
3052    [PBM ... PBM + 10239]  = e1000e_mac_readreg,
3053    [RETA ... RETA + 31]   = e1000e_mac_readreg,
3054    [RSSRK ... RSSRK + 31] = e1000e_mac_readreg,
3055    [MAVTV0 ... MAVTV3]    = e1000e_mac_readreg,
3056    [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_mac_eitr_read
3057};
3058enum { E1000E_NREADOPS = ARRAY_SIZE(e1000e_macreg_readops) };
3059
3060#define e1000e_putreg(x)    [x] = e1000e_mac_writereg
3061typedef void (*writeops)(E1000ECore *, int, uint32_t);
3062static const writeops e1000e_macreg_writeops[] = {
3063    e1000e_putreg(PBA),
3064    e1000e_putreg(SWSM),
3065    e1000e_putreg(WUFC),
3066    e1000e_putreg(RDBAH1),
3067    e1000e_putreg(TDBAH),
3068    e1000e_putreg(TXDCTL),
3069    e1000e_putreg(RDBAH0),
3070    e1000e_putreg(LEDCTL),
3071    e1000e_putreg(FCAL),
3072    e1000e_putreg(FCRUC),
3073    e1000e_putreg(AIT),
3074    e1000e_putreg(TDFH),
3075    e1000e_putreg(TDFT),
3076    e1000e_putreg(TDFHS),
3077    e1000e_putreg(TDFTS),
3078    e1000e_putreg(TDFPC),
3079    e1000e_putreg(WUC),
3080    e1000e_putreg(WUS),
3081    e1000e_putreg(RDFH),
3082    e1000e_putreg(RDFT),
3083    e1000e_putreg(RDFHS),
3084    e1000e_putreg(RDFTS),
3085    e1000e_putreg(RDFPC),
3086    e1000e_putreg(IPAV),
3087    e1000e_putreg(TDBAH1),
3088    e1000e_putreg(TIMINCA),
3089    e1000e_putreg(IAM),
3090    e1000e_putreg(EIAC),
3091    e1000e_putreg(IVAR),
3092    e1000e_putreg(TARC0),
3093    e1000e_putreg(TARC1),
3094    e1000e_putreg(FLSWDATA),
3095    e1000e_putreg(POEMB),
3096    e1000e_putreg(PBS),
3097    e1000e_putreg(MFUTP01),
3098    e1000e_putreg(MFUTP23),
3099    e1000e_putreg(MANC),
3100    e1000e_putreg(MANC2H),
3101    e1000e_putreg(MFVAL),
3102    e1000e_putreg(EXTCNF_CTRL),
3103    e1000e_putreg(FACTPS),
3104    e1000e_putreg(FUNCTAG),
3105    e1000e_putreg(GSCL_1),
3106    e1000e_putreg(GSCL_2),
3107    e1000e_putreg(GSCL_3),
3108    e1000e_putreg(GSCL_4),
3109    e1000e_putreg(GSCN_0),
3110    e1000e_putreg(GSCN_1),
3111    e1000e_putreg(GSCN_2),
3112    e1000e_putreg(GSCN_3),
3113    e1000e_putreg(GCR2),
3114    e1000e_putreg(MRQC),
3115    e1000e_putreg(FLOP),
3116    e1000e_putreg(FLOL),
3117    e1000e_putreg(FLSWCTL),
3118    e1000e_putreg(FLSWCNT),
3119    e1000e_putreg(FLA),
3120    e1000e_putreg(RXDCTL1),
3121    e1000e_putreg(TXDCTL1),
3122    e1000e_putreg(TIPG),
3123    e1000e_putreg(RXSTMPH),
3124    e1000e_putreg(RXSTMPL),
3125    e1000e_putreg(RXSATRL),
3126    e1000e_putreg(RXSATRH),
3127    e1000e_putreg(TXSTMPL),
3128    e1000e_putreg(TXSTMPH),
3129    e1000e_putreg(SYSTIML),
3130    e1000e_putreg(SYSTIMH),
3131    e1000e_putreg(TIMADJL),
3132    e1000e_putreg(TIMADJH),
3133    e1000e_putreg(RXUDP),
3134    e1000e_putreg(RXCFGL),
3135    e1000e_putreg(TSYNCRXCTL),
3136    e1000e_putreg(TSYNCTXCTL),
3137    e1000e_putreg(EXTCNF_SIZE),
3138    e1000e_putreg(EEMNGCTL),
3139    e1000e_putreg(RA),
3140
3141    [TDH1]     = e1000e_set_16bit,
3142    [TDT1]     = e1000e_set_tdt,
3143    [TCTL]     = e1000e_set_tctl,
3144    [TDT]      = e1000e_set_tdt,
3145    [MDIC]     = e1000e_set_mdic,
3146    [ICS]      = e1000e_set_ics,
3147    [TDH]      = e1000e_set_16bit,
3148    [RDH0]     = e1000e_set_16bit,
3149    [RDT0]     = e1000e_set_rdt,
3150    [IMC]      = e1000e_set_imc,
3151    [IMS]      = e1000e_set_ims,
3152    [ICR]      = e1000e_set_icr,
3153    [EECD]     = e1000e_set_eecd,
3154    [RCTL]     = e1000e_set_rx_control,
3155    [CTRL]     = e1000e_set_ctrl,
3156    [RDTR]     = e1000e_set_rdtr,
3157    [RADV]     = e1000e_set_16bit,
3158    [TADV]     = e1000e_set_16bit,
3159    [ITR]      = e1000e_set_itr,
3160    [EERD]     = e1000e_set_eerd,
3161    [GCR]      = e1000e_set_gcr,
3162    [PSRCTL]   = e1000e_set_psrctl,
3163    [RXCSUM]   = e1000e_set_rxcsum,
3164    [RAID]     = e1000e_set_16bit,
3165    [RSRPD]    = e1000e_set_12bit,
3166    [TIDV]     = e1000e_set_tidv,
3167    [TDLEN1]   = e1000e_set_dlen,
3168    [TDLEN]    = e1000e_set_dlen,
3169    [RDLEN0]   = e1000e_set_dlen,
3170    [RDLEN1]   = e1000e_set_dlen,
3171    [TDBAL]    = e1000e_set_dbal,
3172    [TDBAL1]   = e1000e_set_dbal,
3173    [RDBAL0]   = e1000e_set_dbal,
3174    [RDBAL1]   = e1000e_set_dbal,
3175    [RDH1]     = e1000e_set_16bit,
3176    [RDT1]     = e1000e_set_rdt,
3177    [STATUS]   = e1000e_set_status,
3178    [PBACLR]   = e1000e_set_pbaclr,
3179    [CTRL_EXT] = e1000e_set_ctrlext,
3180    [FCAH]     = e1000e_set_16bit,
3181    [FCT]      = e1000e_set_16bit,
3182    [FCTTV]    = e1000e_set_16bit,
3183    [FCRTV]    = e1000e_set_16bit,
3184    [FCRTH]    = e1000e_set_fcrth,
3185    [FCRTL]    = e1000e_set_fcrtl,
3186    [VET]      = e1000e_set_vet,
3187    [RXDCTL]   = e1000e_set_rxdctl,
3188    [FLASHT]   = e1000e_set_16bit,
3189    [EEWR]     = e1000e_set_eewr,
3190    [CTRL_DUP] = e1000e_set_ctrl,
3191    [RFCTL]    = e1000e_set_rfctl,
3192    [RA + 1]   = e1000e_mac_setmacaddr,
3193
3194    [IP6AT ... IP6AT + 3]    = e1000e_mac_writereg,
3195    [IP4AT ... IP4AT + 6]    = e1000e_mac_writereg,
3196    [RA + 2 ... RA + 31]     = e1000e_mac_writereg,
3197    [WUPM ... WUPM + 31]     = e1000e_mac_writereg,
3198    [MTA ... MTA + 127]      = e1000e_mac_writereg,
3199    [VFTA ... VFTA + 127]    = e1000e_mac_writereg,
3200    [FFMT ... FFMT + 254]    = e1000e_mac_writereg,
3201    [FFVT ... FFVT + 254]    = e1000e_mac_writereg,
3202    [PBM ... PBM + 10239]    = e1000e_mac_writereg,
3203    [MDEF ... MDEF + 7]      = e1000e_mac_writereg,
3204    [FFLT ... FFLT + 10]     = e1000e_mac_writereg,
3205    [FTFT ... FTFT + 254]    = e1000e_mac_writereg,
3206    [RETA ... RETA + 31]     = e1000e_mac_writereg,
3207    [RSSRK ... RSSRK + 31]   = e1000e_mac_writereg,
3208    [MAVTV0 ... MAVTV3]      = e1000e_mac_writereg,
3209    [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_set_eitr
3210};
3211enum { E1000E_NWRITEOPS = ARRAY_SIZE(e1000e_macreg_writeops) };
3212
3213enum { MAC_ACCESS_PARTIAL = 1 };
3214
3215/* The array below combines alias offsets of the index values for the
3216 * MAC registers that have aliases, with the indication of not fully
3217 * implemented registers (lowest bit). This combination is possible
3218 * because all of the offsets are even. */
3219static const uint16_t mac_reg_access[E1000E_MAC_SIZE] = {
3220    /* Alias index offsets */
3221    [FCRTL_A] = 0x07fe, [FCRTH_A] = 0x0802,
3222    [RDH0_A]  = 0x09bc, [RDT0_A]  = 0x09bc, [RDTR_A] = 0x09c6,
3223    [RDFH_A]  = 0xe904, [RDFT_A]  = 0xe904,
3224    [TDH_A]   = 0x0cf8, [TDT_A]   = 0x0cf8, [TIDV_A] = 0x0cf8,
3225    [TDFH_A]  = 0xed00, [TDFT_A]  = 0xed00,
3226    [RA_A ... RA_A + 31]      = 0x14f0,
3227    [VFTA_A ... VFTA_A + 127] = 0x1400,
3228    [RDBAL0_A ... RDLEN0_A] = 0x09bc,
3229    [TDBAL_A ... TDLEN_A]   = 0x0cf8,
3230    /* Access options */
3231    [RDFH]  = MAC_ACCESS_PARTIAL,    [RDFT]  = MAC_ACCESS_PARTIAL,
3232    [RDFHS] = MAC_ACCESS_PARTIAL,    [RDFTS] = MAC_ACCESS_PARTIAL,
3233    [RDFPC] = MAC_ACCESS_PARTIAL,
3234    [TDFH]  = MAC_ACCESS_PARTIAL,    [TDFT]  = MAC_ACCESS_PARTIAL,
3235    [TDFHS] = MAC_ACCESS_PARTIAL,    [TDFTS] = MAC_ACCESS_PARTIAL,
3236    [TDFPC] = MAC_ACCESS_PARTIAL,    [EECD]  = MAC_ACCESS_PARTIAL,
3237    [PBM]   = MAC_ACCESS_PARTIAL,    [FLA]   = MAC_ACCESS_PARTIAL,
3238    [FCAL]  = MAC_ACCESS_PARTIAL,    [FCAH]  = MAC_ACCESS_PARTIAL,
3239    [FCT]   = MAC_ACCESS_PARTIAL,    [FCTTV] = MAC_ACCESS_PARTIAL,
3240    [FCRTV] = MAC_ACCESS_PARTIAL,    [FCRTL] = MAC_ACCESS_PARTIAL,
3241    [FCRTH] = MAC_ACCESS_PARTIAL,    [TXDCTL] = MAC_ACCESS_PARTIAL,
3242    [TXDCTL1] = MAC_ACCESS_PARTIAL,
3243    [MAVTV0 ... MAVTV3] = MAC_ACCESS_PARTIAL
3244};
3245
3246void
3247e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size)
3248{
3249    uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr);
3250
3251    if (index < E1000E_NWRITEOPS && e1000e_macreg_writeops[index]) {
3252        if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
3253            trace_e1000e_wrn_regs_write_trivial(index << 2);
3254        }
3255        trace_e1000e_core_write(index << 2, size, val);
3256        e1000e_macreg_writeops[index](core, index, val);
3257    } else if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) {
3258        trace_e1000e_wrn_regs_write_ro(index << 2, size, val);
3259    } else {
3260        trace_e1000e_wrn_regs_write_unknown(index << 2, size, val);
3261    }
3262}
3263
3264uint64_t
3265e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size)
3266{
3267    uint64_t val;
3268    uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr);
3269
3270    if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) {
3271        if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
3272            trace_e1000e_wrn_regs_read_trivial(index << 2);
3273        }
3274        val = e1000e_macreg_readops[index](core, index);
3275        trace_e1000e_core_read(index << 2, size, val);
3276        return val;
3277    } else {
3278        trace_e1000e_wrn_regs_read_unknown(index << 2, size);
3279    }
3280    return 0;
3281}
3282
3283static inline void
3284e1000e_autoneg_pause(E1000ECore *core)
3285{
3286    timer_del(core->autoneg_timer);
3287}
3288
3289static void
3290e1000e_autoneg_resume(E1000ECore *core)
3291{
3292    if (e1000e_have_autoneg(core) &&
3293        !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
3294        qemu_get_queue(core->owner_nic)->link_down = false;
3295        timer_mod(core->autoneg_timer,
3296                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
3297    }
3298}
3299
3300static void
3301e1000e_vm_state_change(void *opaque, bool running, RunState state)
3302{
3303    E1000ECore *core = opaque;
3304
3305    if (running) {
3306        trace_e1000e_vm_state_running();
3307        e1000e_intrmgr_resume(core);
3308        e1000e_autoneg_resume(core);
3309    } else {
3310        trace_e1000e_vm_state_stopped();
3311        e1000e_autoneg_pause(core);
3312        e1000e_intrmgr_pause(core);
3313    }
3314}
3315
3316void
3317e1000e_core_pci_realize(E1000ECore     *core,
3318                        const uint16_t *eeprom_templ,
3319                        uint32_t        eeprom_size,
3320                        const uint8_t  *macaddr)
3321{
3322    int i;
3323
3324    core->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
3325                                       e1000e_autoneg_timer, core);
3326    e1000e_intrmgr_pci_realize(core);
3327
3328    core->vmstate =
3329        qemu_add_vm_change_state_handler(e1000e_vm_state_change, core);
3330
3331    for (i = 0; i < E1000E_NUM_QUEUES; i++) {
3332        net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
3333                        E1000E_MAX_TX_FRAGS, core->has_vnet);
3334    }
3335
3336    net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
3337
3338    e1000x_core_prepare_eeprom(core->eeprom,
3339                               eeprom_templ,
3340                               eeprom_size,
3341                               PCI_DEVICE_GET_CLASS(core->owner)->device_id,
3342                               macaddr);
3343    e1000e_update_rx_offloads(core);
3344}
3345
3346void
3347e1000e_core_pci_uninit(E1000ECore *core)
3348{
3349    int i;
3350
3351    timer_free(core->autoneg_timer);
3352
3353    e1000e_intrmgr_pci_unint(core);
3354
3355    qemu_del_vm_change_state_handler(core->vmstate);
3356
3357    for (i = 0; i < E1000E_NUM_QUEUES; i++) {
3358        net_tx_pkt_reset(core->tx[i].tx_pkt);
3359        net_tx_pkt_uninit(core->tx[i].tx_pkt);
3360    }
3361
3362    net_rx_pkt_uninit(core->rx_pkt);
3363}
3364
3365static const uint16_t
3366e1000e_phy_reg_init[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE] = {
3367    [0] = {
3368        [PHY_CTRL] =   MII_CR_SPEED_SELECT_MSB  |
3369                       MII_CR_FULL_DUPLEX       |
3370                       MII_CR_AUTO_NEG_EN,
3371
3372        [PHY_STATUS] = MII_SR_EXTENDED_CAPS     |
3373                       MII_SR_LINK_STATUS       |
3374                       MII_SR_AUTONEG_CAPS      |
3375                       MII_SR_PREAMBLE_SUPPRESS |
3376                       MII_SR_EXTENDED_STATUS   |
3377                       MII_SR_10T_HD_CAPS       |
3378                       MII_SR_10T_FD_CAPS       |
3379                       MII_SR_100X_HD_CAPS      |
3380                       MII_SR_100X_FD_CAPS,
3381
3382        [PHY_ID1]               = 0x141,
3383        [PHY_ID2]               = E1000_PHY_ID2_82574x,
3384        [PHY_AUTONEG_ADV]       = 0xde1,
3385        [PHY_LP_ABILITY]        = 0x7e0,
3386        [PHY_AUTONEG_EXP]       = BIT(2),
3387        [PHY_NEXT_PAGE_TX]      = BIT(0) | BIT(13),
3388        [PHY_1000T_CTRL]        = BIT(8) | BIT(9) | BIT(10) | BIT(11),
3389        [PHY_1000T_STATUS]      = 0x3c00,
3390        [PHY_EXT_STATUS]        = BIT(12) | BIT(13),
3391
3392        [PHY_COPPER_CTRL1]      = BIT(5) | BIT(6) | BIT(8) | BIT(9) |
3393                                  BIT(12) | BIT(13),
3394        [PHY_COPPER_STAT1]      = BIT(3) | BIT(10) | BIT(11) | BIT(13) | BIT(15)
3395    },
3396    [2] = {
3397        [PHY_MAC_CTRL1]         = BIT(3) | BIT(7),
3398        [PHY_MAC_CTRL2]         = BIT(1) | BIT(2) | BIT(6) | BIT(12)
3399    },
3400    [3] = {
3401        [PHY_LED_TIMER_CTRL]    = BIT(0) | BIT(2) | BIT(14)
3402    }
3403};
3404
3405static const uint32_t e1000e_mac_reg_init[] = {
3406    [PBA]           =     0x00140014,
3407    [LEDCTL]        =  BIT(1) | BIT(8) | BIT(9) | BIT(15) | BIT(17) | BIT(18),
3408    [EXTCNF_CTRL]   = BIT(3),
3409    [EEMNGCTL]      = BIT(31),
3410    [FLASHT]        = 0x2,
3411    [FLSWCTL]       = BIT(30) | BIT(31),
3412    [FLOL]          = BIT(0),
3413    [RXDCTL]        = BIT(16),
3414    [RXDCTL1]       = BIT(16),
3415    [TIPG]          = 0x8 | (0x8 << 10) | (0x6 << 20),
3416    [RXCFGL]        = 0x88F7,
3417    [RXUDP]         = 0x319,
3418    [CTRL]          = E1000_CTRL_FD | E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
3419                      E1000_CTRL_SPD_1000 | E1000_CTRL_SLU |
3420                      E1000_CTRL_ADVD3WUC,
3421    [STATUS]        =  E1000_STATUS_ASDV_1000 | E1000_STATUS_LU,
3422    [PSRCTL]        = (2 << E1000_PSRCTL_BSIZE0_SHIFT) |
3423                      (4 << E1000_PSRCTL_BSIZE1_SHIFT) |
3424                      (4 << E1000_PSRCTL_BSIZE2_SHIFT),
3425    [TARC0]         = 0x3 | E1000_TARC_ENABLE,
3426    [TARC1]         = 0x3 | E1000_TARC_ENABLE,
3427    [EECD]          = E1000_EECD_AUTO_RD | E1000_EECD_PRES,
3428    [EERD]          = E1000_EERW_DONE,
3429    [EEWR]          = E1000_EERW_DONE,
3430    [GCR]           = E1000_L0S_ADJUST |
3431                      E1000_L1_ENTRY_LATENCY_MSB |
3432                      E1000_L1_ENTRY_LATENCY_LSB,
3433    [TDFH]          = 0x600,
3434    [TDFT]          = 0x600,
3435    [TDFHS]         = 0x600,
3436    [TDFTS]         = 0x600,
3437    [POEMB]         = 0x30D,
3438    [PBS]           = 0x028,
3439    [MANC]          = E1000_MANC_DIS_IP_CHK_ARP,
3440    [FACTPS]        = E1000_FACTPS_LAN0_ON | 0x20000000,
3441    [SWSM]          = 1,
3442    [RXCSUM]        = E1000_RXCSUM_IPOFLD | E1000_RXCSUM_TUOFLD,
3443    [ITR]           = E1000E_MIN_XITR,
3444    [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = E1000E_MIN_XITR,
3445};
3446
3447void
3448e1000e_core_reset(E1000ECore *core)
3449{
3450    int i;
3451
3452    timer_del(core->autoneg_timer);
3453
3454    e1000e_intrmgr_reset(core);
3455
3456    memset(core->phy, 0, sizeof core->phy);
3457    memmove(core->phy, e1000e_phy_reg_init, sizeof e1000e_phy_reg_init);
3458    memset(core->mac, 0, sizeof core->mac);
3459    memmove(core->mac, e1000e_mac_reg_init, sizeof e1000e_mac_reg_init);
3460
3461    core->rxbuf_min_shift = 1 + E1000_RING_DESC_LEN_SHIFT;
3462
3463    if (qemu_get_queue(core->owner_nic)->link_down) {
3464        e1000e_link_down(core);
3465    }
3466
3467    e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
3468
3469    for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
3470        net_tx_pkt_reset(core->tx[i].tx_pkt);
3471        memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
3472        core->tx[i].skip_cp = false;
3473    }
3474}
3475
3476void e1000e_core_pre_save(E1000ECore *core)
3477{
3478    int i;
3479    NetClientState *nc = qemu_get_queue(core->owner_nic);
3480
3481    /*
3482    * If link is down and auto-negotiation is supported and ongoing,
3483    * complete auto-negotiation immediately. This allows us to look
3484    * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
3485    */
3486    if (nc->link_down && e1000e_have_autoneg(core)) {
3487        core->phy[0][PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
3488        e1000e_update_flowctl_status(core);
3489    }
3490
3491    for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
3492        if (net_tx_pkt_has_fragments(core->tx[i].tx_pkt)) {
3493            core->tx[i].skip_cp = true;
3494        }
3495    }
3496}
3497
3498int
3499e1000e_core_post_load(E1000ECore *core)
3500{
3501    NetClientState *nc = qemu_get_queue(core->owner_nic);
3502
3503    /* nc.link_down can't be migrated, so infer link_down according
3504     * to link status bit in core.mac[STATUS].
3505     */
3506    nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0;
3507
3508    return 0;
3509}
3510