qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2.1 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/pci/pci.h"
  30#include "hw/qdev-properties.h"
  31#include "migration/vmstate.h"
  32#include "net/net.h"
  33#include "net/checksum.h"
  34#include "sysemu/sysemu.h"
  35#include "sysemu/dma.h"
  36#include "qemu/iov.h"
  37#include "qemu/module.h"
  38#include "qemu/range.h"
  39
  40#include "e1000x_common.h"
  41#include "trace.h"
  42#include "qom/object.h"
  43
  44static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  45
  46/* #define E1000_DEBUG */
  47
  48#ifdef E1000_DEBUG
  49enum {
  50    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  51    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  52    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  53    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  54};
  55#define DBGBIT(x)    (1<<DEBUG_##x)
  56static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  57
  58#define DBGOUT(what, fmt, ...) do { \
  59    if (debugflags & DBGBIT(what)) \
  60        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  61    } while (0)
  62#else
  63#define DBGOUT(what, fmt, ...) do {} while (0)
  64#endif
  65
  66#define IOPORT_SIZE       0x40
  67#define PNPMMIO_SIZE      0x20000
  68#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  69
  70#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  71
  72/*
  73 * HW models:
  74 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  75 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  76 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  77 *  Others never tested
  78 */
  79
  80struct E1000State_st {
  81    /*< private >*/
  82    PCIDevice parent_obj;
  83    /*< public >*/
  84
  85    NICState *nic;
  86    NICConf conf;
  87    MemoryRegion mmio;
  88    MemoryRegion io;
  89
  90    uint32_t mac_reg[0x8000];
  91    uint16_t phy_reg[0x20];
  92    uint16_t eeprom_data[64];
  93
  94    uint32_t rxbuf_size;
  95    uint32_t rxbuf_min_shift;
  96    struct e1000_tx {
  97        unsigned char header[256];
  98        unsigned char vlan_header[4];
  99        /* Fields vlan and data must not be reordered or separated. */
 100        unsigned char vlan[4];
 101        unsigned char data[0x10000];
 102        uint16_t size;
 103        unsigned char vlan_needed;
 104        unsigned char sum_needed;
 105        bool cptse;
 106        e1000x_txd_props props;
 107        e1000x_txd_props tso_props;
 108        uint16_t tso_frames;
 109    } tx;
 110
 111    struct {
 112        uint32_t val_in;    /* shifted in from guest driver */
 113        uint16_t bitnum_in;
 114        uint16_t bitnum_out;
 115        uint16_t reading;
 116        uint32_t old_eecd;
 117    } eecd_state;
 118
 119    QEMUTimer *autoneg_timer;
 120
 121    QEMUTimer *mit_timer;      /* Mitigation timer. */
 122    bool mit_timer_on;         /* Mitigation timer is running. */
 123    bool mit_irq_level;        /* Tracks interrupt pin level. */
 124    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 125
 126    QEMUTimer *flush_queue_timer;
 127
 128/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 129#define E1000_FLAG_AUTONEG_BIT 0
 130#define E1000_FLAG_MIT_BIT 1
 131#define E1000_FLAG_MAC_BIT 2
 132#define E1000_FLAG_TSO_BIT 3
 133#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 134#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 135#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 136#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 137    uint32_t compat_flags;
 138    bool received_tx_tso;
 139    bool use_tso_for_migration;
 140    e1000x_txd_props mig_props;
 141};
 142typedef struct E1000State_st E1000State;
 143
 144#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 145
 146struct E1000BaseClass {
 147    PCIDeviceClass parent_class;
 148    uint16_t phy_id2;
 149};
 150typedef struct E1000BaseClass E1000BaseClass;
 151
 152#define TYPE_E1000_BASE "e1000-base"
 153
 154DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
 155                     E1000, TYPE_E1000_BASE)
 156
 157
 158static void
 159e1000_link_up(E1000State *s)
 160{
 161    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 162
 163    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 164    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 165}
 166
 167static void
 168e1000_autoneg_done(E1000State *s)
 169{
 170    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 171
 172    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 173    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 174}
 175
 176static bool
 177have_autoneg(E1000State *s)
 178{
 179    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 180}
 181
 182static void
 183set_phy_ctrl(E1000State *s, int index, uint16_t val)
 184{
 185    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 186    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 187                                   MII_CR_RESET |
 188                                   MII_CR_RESTART_AUTO_NEG);
 189
 190    /*
 191     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 192     * migrate during auto negotiation, after migration the link will be
 193     * down.
 194     */
 195    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 196        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 197    }
 198}
 199
 200static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 201    [PHY_CTRL] = set_phy_ctrl,
 202};
 203
 204enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 205
 206enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 207static const char phy_regcap[0x20] = {
 208    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 209    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 210    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 211    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 212    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 213    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 214    [PHY_AUTONEG_EXP] = PHY_R,
 215};
 216
 217/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 218static const uint16_t phy_reg_init[] = {
 219    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 220                   MII_CR_FULL_DUPLEX |
 221                   MII_CR_AUTO_NEG_EN,
 222
 223    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 224                   MII_SR_LINK_STATUS |   /* link initially up */
 225                   MII_SR_AUTONEG_CAPS |
 226                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 227                   MII_SR_PREAMBLE_SUPPRESS |
 228                   MII_SR_EXTENDED_STATUS |
 229                   MII_SR_10T_HD_CAPS |
 230                   MII_SR_10T_FD_CAPS |
 231                   MII_SR_100X_HD_CAPS |
 232                   MII_SR_100X_FD_CAPS,
 233
 234    [PHY_ID1] = 0x141,
 235    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 236    [PHY_AUTONEG_ADV] = 0xde1,
 237    [PHY_LP_ABILITY] = 0x1e0,
 238    [PHY_1000T_CTRL] = 0x0e00,
 239    [PHY_1000T_STATUS] = 0x3c00,
 240    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 241    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 242    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 243};
 244
 245static const uint32_t mac_reg_init[] = {
 246    [PBA]     = 0x00100030,
 247    [LEDCTL]  = 0x602,
 248    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 249                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 250    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 251                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 252                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 253                E1000_STATUS_LU,
 254    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 255                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 256                E1000_MANC_RMCP_EN,
 257};
 258
 259/* Helper function, *curr == 0 means the value is not set */
 260static inline void
 261mit_update_delay(uint32_t *curr, uint32_t value)
 262{
 263    if (value && (*curr == 0 || value < *curr)) {
 264        *curr = value;
 265    }
 266}
 267
 268static void
 269set_interrupt_cause(E1000State *s, int index, uint32_t val)
 270{
 271    PCIDevice *d = PCI_DEVICE(s);
 272    uint32_t pending_ints;
 273    uint32_t mit_delay;
 274
 275    s->mac_reg[ICR] = val;
 276
 277    /*
 278     * Make sure ICR and ICS registers have the same value.
 279     * The spec says that the ICS register is write-only.  However in practice,
 280     * on real hardware ICS is readable, and for reads it has the same value as
 281     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 282     *
 283     * The VxWorks PRO/1000 driver uses this behaviour.
 284     */
 285    s->mac_reg[ICS] = val;
 286
 287    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 288    if (!s->mit_irq_level && pending_ints) {
 289        /*
 290         * Here we detect a potential raising edge. We postpone raising the
 291         * interrupt line if we are inside the mitigation delay window
 292         * (s->mit_timer_on == 1).
 293         * We provide a partial implementation of interrupt mitigation,
 294         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 295         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 296         * RADV; relative timers based on TIDV and RDTR are not implemented.
 297         */
 298        if (s->mit_timer_on) {
 299            return;
 300        }
 301        if (chkflag(MIT)) {
 302            /* Compute the next mitigation delay according to pending
 303             * interrupts and the current values of RADV (provided
 304             * RDTR!=0), TADV and ITR.
 305             * Then rearm the timer.
 306             */
 307            mit_delay = 0;
 308            if (s->mit_ide &&
 309                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 310                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 311            }
 312            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 313                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 314            }
 315            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 316
 317            /*
 318             * According to e1000 SPEC, the Ethernet controller guarantees
 319             * a maximum observable interrupt rate of 7813 interrupts/sec.
 320             * Thus if mit_delay < 500 then the delay should be set to the
 321             * minimum delay possible which is 500.
 322             */
 323            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 324
 325            s->mit_timer_on = 1;
 326            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 327                      mit_delay * 256);
 328            s->mit_ide = 0;
 329        }
 330    }
 331
 332    s->mit_irq_level = (pending_ints != 0);
 333    pci_set_irq(d, s->mit_irq_level);
 334}
 335
 336static void
 337e1000_mit_timer(void *opaque)
 338{
 339    E1000State *s = opaque;
 340
 341    s->mit_timer_on = 0;
 342    /* Call set_interrupt_cause to update the irq level (if necessary). */
 343    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 344}
 345
 346static void
 347set_ics(E1000State *s, int index, uint32_t val)
 348{
 349    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 350        s->mac_reg[IMS]);
 351    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 352}
 353
 354static void
 355e1000_autoneg_timer(void *opaque)
 356{
 357    E1000State *s = opaque;
 358    if (!qemu_get_queue(s->nic)->link_down) {
 359        e1000_autoneg_done(s);
 360        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 361    }
 362}
 363
 364static void e1000_reset(void *opaque)
 365{
 366    E1000State *d = opaque;
 367    E1000BaseClass *edc = E1000_GET_CLASS(d);
 368    uint8_t *macaddr = d->conf.macaddr.a;
 369
 370    timer_del(d->autoneg_timer);
 371    timer_del(d->mit_timer);
 372    timer_del(d->flush_queue_timer);
 373    d->mit_timer_on = 0;
 374    d->mit_irq_level = 0;
 375    d->mit_ide = 0;
 376    memset(d->phy_reg, 0, sizeof d->phy_reg);
 377    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 378    d->phy_reg[PHY_ID2] = edc->phy_id2;
 379    memset(d->mac_reg, 0, sizeof d->mac_reg);
 380    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 381    d->rxbuf_min_shift = 1;
 382    memset(&d->tx, 0, sizeof d->tx);
 383
 384    if (qemu_get_queue(d->nic)->link_down) {
 385        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 386    }
 387
 388    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 389}
 390
 391static void
 392set_ctrl(E1000State *s, int index, uint32_t val)
 393{
 394    /* RST is self clearing */
 395    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 396}
 397
 398static void
 399e1000_flush_queue_timer(void *opaque)
 400{
 401    E1000State *s = opaque;
 402
 403    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 404}
 405
 406static void
 407set_rx_control(E1000State *s, int index, uint32_t val)
 408{
 409    s->mac_reg[RCTL] = val;
 410    s->rxbuf_size = e1000x_rxbufsize(val);
 411    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 412    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 413           s->mac_reg[RCTL]);
 414    timer_mod(s->flush_queue_timer,
 415              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
 416}
 417
 418static void
 419set_mdic(E1000State *s, int index, uint32_t val)
 420{
 421    uint32_t data = val & E1000_MDIC_DATA_MASK;
 422    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 423
 424    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 425        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 426    else if (val & E1000_MDIC_OP_READ) {
 427        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 428        if (!(phy_regcap[addr] & PHY_R)) {
 429            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 430            val |= E1000_MDIC_ERROR;
 431        } else
 432            val = (val ^ data) | s->phy_reg[addr];
 433    } else if (val & E1000_MDIC_OP_WRITE) {
 434        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 435        if (!(phy_regcap[addr] & PHY_W)) {
 436            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 437            val |= E1000_MDIC_ERROR;
 438        } else {
 439            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 440                phyreg_writeops[addr](s, index, data);
 441            } else {
 442                s->phy_reg[addr] = data;
 443            }
 444        }
 445    }
 446    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 447
 448    if (val & E1000_MDIC_INT_EN) {
 449        set_ics(s, 0, E1000_ICR_MDAC);
 450    }
 451}
 452
 453static uint32_t
 454get_eecd(E1000State *s, int index)
 455{
 456    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 457
 458    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 459           s->eecd_state.bitnum_out, s->eecd_state.reading);
 460    if (!s->eecd_state.reading ||
 461        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 462          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 463        ret |= E1000_EECD_DO;
 464    return ret;
 465}
 466
 467static void
 468set_eecd(E1000State *s, int index, uint32_t val)
 469{
 470    uint32_t oldval = s->eecd_state.old_eecd;
 471
 472    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 473            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 474    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 475        return;
 476    }
 477    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 478        s->eecd_state.val_in = 0;
 479        s->eecd_state.bitnum_in = 0;
 480        s->eecd_state.bitnum_out = 0;
 481        s->eecd_state.reading = 0;
 482    }
 483    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 484        return;
 485    }
 486    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 487        s->eecd_state.bitnum_out++;
 488        return;
 489    }
 490    s->eecd_state.val_in <<= 1;
 491    if (val & E1000_EECD_DI)
 492        s->eecd_state.val_in |= 1;
 493    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 494        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 495        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 496            EEPROM_READ_OPCODE_MICROWIRE);
 497    }
 498    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 499           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 500           s->eecd_state.reading);
 501}
 502
 503static uint32_t
 504flash_eerd_read(E1000State *s, int x)
 505{
 506    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 507
 508    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 509        return (s->mac_reg[EERD]);
 510
 511    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 512        return (E1000_EEPROM_RW_REG_DONE | r);
 513
 514    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 515           E1000_EEPROM_RW_REG_DONE | r);
 516}
 517
 518static void
 519putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 520{
 521    uint32_t sum;
 522
 523    if (cse && cse < n)
 524        n = cse + 1;
 525    if (sloc < n-1) {
 526        sum = net_checksum_add(n-css, data+css);
 527        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 528    }
 529}
 530
 531static inline void
 532inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 533{
 534    if (!memcmp(arr, bcast, sizeof bcast)) {
 535        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 536    } else if (arr[0] & 1) {
 537        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 538    }
 539}
 540
 541static void
 542e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 543{
 544    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 545                                    PTC1023, PTC1522 };
 546
 547    NetClientState *nc = qemu_get_queue(s->nic);
 548    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 549        nc->info->receive(nc, buf, size);
 550    } else {
 551        qemu_send_packet(nc, buf, size);
 552    }
 553    inc_tx_bcast_or_mcast_count(s, buf);
 554    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 555}
 556
 557static void
 558xmit_seg(E1000State *s)
 559{
 560    uint16_t len;
 561    unsigned int frames = s->tx.tso_frames, css, sofar;
 562    struct e1000_tx *tp = &s->tx;
 563    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 564
 565    if (tp->cptse) {
 566        css = props->ipcss;
 567        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 568               frames, tp->size, css);
 569        if (props->ip) {    /* IPv4 */
 570            stw_be_p(tp->data+css+2, tp->size - css);
 571            stw_be_p(tp->data+css+4,
 572                     lduw_be_p(tp->data + css + 4) + frames);
 573        } else {         /* IPv6 */
 574            stw_be_p(tp->data+css+4, tp->size - css);
 575        }
 576        css = props->tucss;
 577        len = tp->size - css;
 578        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 579        if (props->tcp) {
 580            sofar = frames * props->mss;
 581            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 582            if (props->paylen - sofar > props->mss) {
 583                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 584            } else if (frames) {
 585                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 586            }
 587        } else {    /* UDP */
 588            stw_be_p(tp->data+css+4, len);
 589        }
 590        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 591            unsigned int phsum;
 592            // add pseudo-header length before checksum calculation
 593            void *sp = tp->data + props->tucso;
 594
 595            phsum = lduw_be_p(sp) + len;
 596            phsum = (phsum >> 16) + (phsum & 0xffff);
 597            stw_be_p(sp, phsum);
 598        }
 599        tp->tso_frames++;
 600    }
 601
 602    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 603        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 604    }
 605    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 606        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 607    }
 608    if (tp->vlan_needed) {
 609        memmove(tp->vlan, tp->data, 4);
 610        memmove(tp->data, tp->data + 4, 8);
 611        memcpy(tp->data + 8, tp->vlan_header, 4);
 612        e1000_send_packet(s, tp->vlan, tp->size + 4);
 613    } else {
 614        e1000_send_packet(s, tp->data, tp->size);
 615    }
 616
 617    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 618    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 619    s->mac_reg[GPTC] = s->mac_reg[TPT];
 620    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 621    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 622}
 623
 624static void
 625process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 626{
 627    PCIDevice *d = PCI_DEVICE(s);
 628    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 629    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 630    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 631    unsigned int msh = 0xfffff;
 632    uint64_t addr;
 633    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 634    struct e1000_tx *tp = &s->tx;
 635
 636    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 637    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 638        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 639            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 640            s->use_tso_for_migration = 1;
 641            tp->tso_frames = 0;
 642        } else {
 643            e1000x_read_tx_ctx_descr(xp, &tp->props);
 644            s->use_tso_for_migration = 0;
 645        }
 646        return;
 647    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 648        // data descriptor
 649        if (tp->size == 0) {
 650            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 651        }
 652        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 653    } else {
 654        // legacy descriptor
 655        tp->cptse = 0;
 656    }
 657
 658    if (e1000x_vlan_enabled(s->mac_reg) &&
 659        e1000x_is_vlan_txd(txd_lower) &&
 660        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 661        tp->vlan_needed = 1;
 662        stw_be_p(tp->vlan_header,
 663                      le16_to_cpu(s->mac_reg[VET]));
 664        stw_be_p(tp->vlan_header + 2,
 665                      le16_to_cpu(dp->upper.fields.special));
 666    }
 667
 668    addr = le64_to_cpu(dp->buffer_addr);
 669    if (tp->cptse) {
 670        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 671        do {
 672            bytes = split_size;
 673            if (tp->size + bytes > msh)
 674                bytes = msh - tp->size;
 675
 676            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 677            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 678            sz = tp->size + bytes;
 679            if (sz >= tp->tso_props.hdr_len
 680                && tp->size < tp->tso_props.hdr_len) {
 681                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 682            }
 683            tp->size = sz;
 684            addr += bytes;
 685            if (sz == msh) {
 686                xmit_seg(s);
 687                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 688                tp->size = tp->tso_props.hdr_len;
 689            }
 690            split_size -= bytes;
 691        } while (bytes && split_size);
 692    } else {
 693        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 694        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 695        tp->size += split_size;
 696    }
 697
 698    if (!(txd_lower & E1000_TXD_CMD_EOP))
 699        return;
 700    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 701        xmit_seg(s);
 702    }
 703    tp->tso_frames = 0;
 704    tp->sum_needed = 0;
 705    tp->vlan_needed = 0;
 706    tp->size = 0;
 707    tp->cptse = 0;
 708}
 709
 710static uint32_t
 711txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 712{
 713    PCIDevice *d = PCI_DEVICE(s);
 714    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 715
 716    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 717        return 0;
 718    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 719                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 720    dp->upper.data = cpu_to_le32(txd_upper);
 721    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 722                  &dp->upper, sizeof(dp->upper));
 723    return E1000_ICR_TXDW;
 724}
 725
 726static uint64_t tx_desc_base(E1000State *s)
 727{
 728    uint64_t bah = s->mac_reg[TDBAH];
 729    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 730
 731    return (bah << 32) + bal;
 732}
 733
 734static void
 735start_xmit(E1000State *s)
 736{
 737    PCIDevice *d = PCI_DEVICE(s);
 738    dma_addr_t base;
 739    struct e1000_tx_desc desc;
 740    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 741
 742    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 743        DBGOUT(TX, "tx disabled\n");
 744        return;
 745    }
 746
 747    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 748        base = tx_desc_base(s) +
 749               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 750        pci_dma_read(d, base, &desc, sizeof(desc));
 751
 752        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 753               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 754               desc.upper.data);
 755
 756        process_tx_desc(s, &desc);
 757        cause |= txdesc_writeback(s, base, &desc);
 758
 759        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 760            s->mac_reg[TDH] = 0;
 761        /*
 762         * the following could happen only if guest sw assigns
 763         * bogus values to TDT/TDLEN.
 764         * there's nothing too intelligent we could do about this.
 765         */
 766        if (s->mac_reg[TDH] == tdh_start ||
 767            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 768            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 769                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 770            break;
 771        }
 772    }
 773    set_ics(s, 0, cause);
 774}
 775
 776static int
 777receive_filter(E1000State *s, const uint8_t *buf, int size)
 778{
 779    uint32_t rctl = s->mac_reg[RCTL];
 780    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 781
 782    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 783        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 784        uint16_t vid = lduw_be_p(buf + 14);
 785        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 786                                 ((vid >> 5) & 0x7f));
 787        if ((vfta & (1 << (vid & 0x1f))) == 0)
 788            return 0;
 789    }
 790
 791    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 792        return 1;
 793    }
 794
 795    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 796        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 797        return 1;
 798    }
 799
 800    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 801        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 802        return 1;
 803    }
 804
 805    return e1000x_rx_group_filter(s->mac_reg, buf);
 806}
 807
 808static void
 809e1000_set_link_status(NetClientState *nc)
 810{
 811    E1000State *s = qemu_get_nic_opaque(nc);
 812    uint32_t old_status = s->mac_reg[STATUS];
 813
 814    if (nc->link_down) {
 815        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 816    } else {
 817        if (have_autoneg(s) &&
 818            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 819            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 820        } else {
 821            e1000_link_up(s);
 822        }
 823    }
 824
 825    if (s->mac_reg[STATUS] != old_status)
 826        set_ics(s, 0, E1000_ICR_LSC);
 827}
 828
 829static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 830{
 831    int bufs;
 832    /* Fast-path short packets */
 833    if (total_size <= s->rxbuf_size) {
 834        return s->mac_reg[RDH] != s->mac_reg[RDT];
 835    }
 836    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 837        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 838    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 839        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 840            s->mac_reg[RDT] - s->mac_reg[RDH];
 841    } else {
 842        return false;
 843    }
 844    return total_size <= bufs * s->rxbuf_size;
 845}
 846
 847static bool
 848e1000_can_receive(NetClientState *nc)
 849{
 850    E1000State *s = qemu_get_nic_opaque(nc);
 851
 852    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 853        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
 854}
 855
 856static uint64_t rx_desc_base(E1000State *s)
 857{
 858    uint64_t bah = s->mac_reg[RDBAH];
 859    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 860
 861    return (bah << 32) + bal;
 862}
 863
 864static void
 865e1000_receiver_overrun(E1000State *s, size_t size)
 866{
 867    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
 868    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
 869    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
 870    set_ics(s, 0, E1000_ICS_RXO);
 871}
 872
 873static ssize_t
 874e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 875{
 876    E1000State *s = qemu_get_nic_opaque(nc);
 877    PCIDevice *d = PCI_DEVICE(s);
 878    struct e1000_rx_desc desc;
 879    dma_addr_t base;
 880    unsigned int n, rdt;
 881    uint32_t rdh_start;
 882    uint16_t vlan_special = 0;
 883    uint8_t vlan_status = 0;
 884    uint8_t min_buf[MIN_BUF_SIZE];
 885    struct iovec min_iov;
 886    uint8_t *filter_buf = iov->iov_base;
 887    size_t size = iov_size(iov, iovcnt);
 888    size_t iov_ofs = 0;
 889    size_t desc_offset;
 890    size_t desc_size;
 891    size_t total_size;
 892
 893    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 894        return -1;
 895    }
 896
 897    if (timer_pending(s->flush_queue_timer)) {
 898        return 0;
 899    }
 900
 901    /* Pad to minimum Ethernet frame length */
 902    if (size < sizeof(min_buf)) {
 903        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 904        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 905        min_iov.iov_base = filter_buf = min_buf;
 906        min_iov.iov_len = size = sizeof(min_buf);
 907        iovcnt = 1;
 908        iov = &min_iov;
 909    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 910        /* This is very unlikely, but may happen. */
 911        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 912        filter_buf = min_buf;
 913    }
 914
 915    /* Discard oversized packets if !LPE and !SBP. */
 916    if (e1000x_is_oversized(s->mac_reg, size)) {
 917        return size;
 918    }
 919
 920    if (!receive_filter(s, filter_buf, size)) {
 921        return size;
 922    }
 923
 924    if (e1000x_vlan_enabled(s->mac_reg) &&
 925        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 926        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 927        iov_ofs = 4;
 928        if (filter_buf == iov->iov_base) {
 929            memmove(filter_buf + 4, filter_buf, 12);
 930        } else {
 931            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 932            while (iov->iov_len <= iov_ofs) {
 933                iov_ofs -= iov->iov_len;
 934                iov++;
 935            }
 936        }
 937        vlan_status = E1000_RXD_STAT_VP;
 938        size -= 4;
 939    }
 940
 941    rdh_start = s->mac_reg[RDH];
 942    desc_offset = 0;
 943    total_size = size + e1000x_fcs_len(s->mac_reg);
 944    if (!e1000_has_rxbufs(s, total_size)) {
 945        e1000_receiver_overrun(s, total_size);
 946        return -1;
 947    }
 948    do {
 949        desc_size = total_size - desc_offset;
 950        if (desc_size > s->rxbuf_size) {
 951            desc_size = s->rxbuf_size;
 952        }
 953        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 954        pci_dma_read(d, base, &desc, sizeof(desc));
 955        desc.special = vlan_special;
 956        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 957        if (desc.buffer_addr) {
 958            if (desc_offset < size) {
 959                size_t iov_copy;
 960                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 961                size_t copy_size = size - desc_offset;
 962                if (copy_size > s->rxbuf_size) {
 963                    copy_size = s->rxbuf_size;
 964                }
 965                do {
 966                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 967                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 968                    copy_size -= iov_copy;
 969                    ba += iov_copy;
 970                    iov_ofs += iov_copy;
 971                    if (iov_ofs == iov->iov_len) {
 972                        iov++;
 973                        iov_ofs = 0;
 974                    }
 975                } while (copy_size);
 976            }
 977            desc_offset += desc_size;
 978            desc.length = cpu_to_le16(desc_size);
 979            if (desc_offset >= total_size) {
 980                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 981            } else {
 982                /* Guest zeroing out status is not a hardware requirement.
 983                   Clear EOP in case guest didn't do it. */
 984                desc.status &= ~E1000_RXD_STAT_EOP;
 985            }
 986        } else { // as per intel docs; skip descriptors with null buf addr
 987            DBGOUT(RX, "Null RX descriptor!!\n");
 988        }
 989        pci_dma_write(d, base, &desc, sizeof(desc));
 990
 991        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 992            s->mac_reg[RDH] = 0;
 993        /* see comment in start_xmit; same here */
 994        if (s->mac_reg[RDH] == rdh_start ||
 995            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
 996            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
 997                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
 998            e1000_receiver_overrun(s, total_size);
 999            return -1;
1000        }
1001    } while (desc_offset < total_size);
1002
1003    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1004
1005    n = E1000_ICS_RXT0;
1006    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1007        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1008    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1009        s->rxbuf_min_shift)
1010        n |= E1000_ICS_RXDMT0;
1011
1012    set_ics(s, 0, n);
1013
1014    return size;
1015}
1016
1017static ssize_t
1018e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1019{
1020    const struct iovec iov = {
1021        .iov_base = (uint8_t *)buf,
1022        .iov_len = size
1023    };
1024
1025    return e1000_receive_iov(nc, &iov, 1);
1026}
1027
1028static uint32_t
1029mac_readreg(E1000State *s, int index)
1030{
1031    return s->mac_reg[index];
1032}
1033
1034static uint32_t
1035mac_low4_read(E1000State *s, int index)
1036{
1037    return s->mac_reg[index] & 0xf;
1038}
1039
1040static uint32_t
1041mac_low11_read(E1000State *s, int index)
1042{
1043    return s->mac_reg[index] & 0x7ff;
1044}
1045
1046static uint32_t
1047mac_low13_read(E1000State *s, int index)
1048{
1049    return s->mac_reg[index] & 0x1fff;
1050}
1051
1052static uint32_t
1053mac_low16_read(E1000State *s, int index)
1054{
1055    return s->mac_reg[index] & 0xffff;
1056}
1057
1058static uint32_t
1059mac_icr_read(E1000State *s, int index)
1060{
1061    uint32_t ret = s->mac_reg[ICR];
1062
1063    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1064    set_interrupt_cause(s, 0, 0);
1065    return ret;
1066}
1067
1068static uint32_t
1069mac_read_clr4(E1000State *s, int index)
1070{
1071    uint32_t ret = s->mac_reg[index];
1072
1073    s->mac_reg[index] = 0;
1074    return ret;
1075}
1076
1077static uint32_t
1078mac_read_clr8(E1000State *s, int index)
1079{
1080    uint32_t ret = s->mac_reg[index];
1081
1082    s->mac_reg[index] = 0;
1083    s->mac_reg[index-1] = 0;
1084    return ret;
1085}
1086
1087static void
1088mac_writereg(E1000State *s, int index, uint32_t val)
1089{
1090    uint32_t macaddr[2];
1091
1092    s->mac_reg[index] = val;
1093
1094    if (index == RA + 1) {
1095        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1096        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1097        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1098    }
1099}
1100
1101static void
1102set_rdt(E1000State *s, int index, uint32_t val)
1103{
1104    s->mac_reg[index] = val & 0xffff;
1105    if (e1000_has_rxbufs(s, 1)) {
1106        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1107    }
1108}
1109
1110static void
1111set_16bit(E1000State *s, int index, uint32_t val)
1112{
1113    s->mac_reg[index] = val & 0xffff;
1114}
1115
1116static void
1117set_dlen(E1000State *s, int index, uint32_t val)
1118{
1119    s->mac_reg[index] = val & 0xfff80;
1120}
1121
1122static void
1123set_tctl(E1000State *s, int index, uint32_t val)
1124{
1125    s->mac_reg[index] = val;
1126    s->mac_reg[TDT] &= 0xffff;
1127    start_xmit(s);
1128}
1129
1130static void
1131set_icr(E1000State *s, int index, uint32_t val)
1132{
1133    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1134    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1135}
1136
1137static void
1138set_imc(E1000State *s, int index, uint32_t val)
1139{
1140    s->mac_reg[IMS] &= ~val;
1141    set_ics(s, 0, 0);
1142}
1143
1144static void
1145set_ims(E1000State *s, int index, uint32_t val)
1146{
1147    s->mac_reg[IMS] |= val;
1148    set_ics(s, 0, 0);
1149}
1150
1151#define getreg(x)    [x] = mac_readreg
1152typedef uint32_t (*readops)(E1000State *, int);
1153static const readops macreg_readops[] = {
1154    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1155    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1156    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1157    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1158    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1159    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1160    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1161    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1162    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1163    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1164    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1165    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1166    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1167    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1168    getreg(GOTCL),
1169
1170    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1171    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1172    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1173    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1174    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1175    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1176    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1177    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1178    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1179    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1180    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1181    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1182    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1183    [MPTC]    = mac_read_clr4,
1184    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1185    [EERD]    = flash_eerd_read,
1186    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1187    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1188    [RDFPC]   = mac_low13_read,
1189    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1190    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1191    [TDFPC]   = mac_low13_read,
1192    [AIT]     = mac_low16_read,
1193
1194    [CRCERRS ... MPC]   = &mac_readreg,
1195    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1196    [FFLT ... FFLT+6]   = &mac_low11_read,
1197    [RA ... RA+31]      = &mac_readreg,
1198    [WUPM ... WUPM+31]  = &mac_readreg,
1199    [MTA ... MTA+127]   = &mac_readreg,
1200    [VFTA ... VFTA+127] = &mac_readreg,
1201    [FFMT ... FFMT+254] = &mac_low4_read,
1202    [FFVT ... FFVT+254] = &mac_readreg,
1203    [PBM ... PBM+16383] = &mac_readreg,
1204};
1205enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1206
1207#define putreg(x)    [x] = mac_writereg
1208typedef void (*writeops)(E1000State *, int, uint32_t);
1209static const writeops macreg_writeops[] = {
1210    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1211    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1212    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1213    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1214    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1215    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1216    putreg(WUS),      putreg(AIT),
1217
1218    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1219    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1220    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1221    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1222    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1223    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1224    [ITR]    = set_16bit,
1225
1226    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1227    [FFLT ... FFLT+6]   = &mac_writereg,
1228    [RA ... RA+31]      = &mac_writereg,
1229    [WUPM ... WUPM+31]  = &mac_writereg,
1230    [MTA ... MTA+127]   = &mac_writereg,
1231    [VFTA ... VFTA+127] = &mac_writereg,
1232    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1233    [PBM ... PBM+16383] = &mac_writereg,
1234};
1235
1236enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1237
1238enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1239
1240#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1241/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1242 * f - flag bits (up to 6 possible flags)
1243 * n - flag needed
1244 * p - partially implenented */
1245static const uint8_t mac_reg_access[0x8000] = {
1246    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1247    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1248
1249    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1250    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1251    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1252    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1253    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1254    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1255    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1256    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1257    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1258    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1259    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1260    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1261    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1262    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1263    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1264    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1265    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1266    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1267    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1268    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1269    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1270    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1271    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1272    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1273    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1274    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1275    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1276    [BPTC]    = markflag(MAC),
1277
1278    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1279    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289};
1290
1291static void
1292e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1293                 unsigned size)
1294{
1295    E1000State *s = opaque;
1296    unsigned int index = (addr & 0x1ffff) >> 2;
1297
1298    if (index < NWRITEOPS && macreg_writeops[index]) {
1299        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1300            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1301            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1302                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1303                       "It is not fully implemented.\n", index<<2);
1304            }
1305            macreg_writeops[index](s, index, val);
1306        } else {    /* "flag needed" bit is set, but the flag is not active */
1307            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1308                   index<<2);
1309        }
1310    } else if (index < NREADOPS && macreg_readops[index]) {
1311        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1312               index<<2, val);
1313    } else {
1314        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1315               index<<2, val);
1316    }
1317}
1318
1319static uint64_t
1320e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1321{
1322    E1000State *s = opaque;
1323    unsigned int index = (addr & 0x1ffff) >> 2;
1324
1325    if (index < NREADOPS && macreg_readops[index]) {
1326        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1327            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1328            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1329                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1330                       "It is not fully implemented.\n", index<<2);
1331            }
1332            return macreg_readops[index](s, index);
1333        } else {    /* "flag needed" bit is set, but the flag is not active */
1334            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1335                   index<<2);
1336        }
1337    } else {
1338        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1339    }
1340    return 0;
1341}
1342
1343static const MemoryRegionOps e1000_mmio_ops = {
1344    .read = e1000_mmio_read,
1345    .write = e1000_mmio_write,
1346    .endianness = DEVICE_LITTLE_ENDIAN,
1347    .impl = {
1348        .min_access_size = 4,
1349        .max_access_size = 4,
1350    },
1351};
1352
1353static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1354                              unsigned size)
1355{
1356    E1000State *s = opaque;
1357
1358    (void)s;
1359    return 0;
1360}
1361
1362static void e1000_io_write(void *opaque, hwaddr addr,
1363                           uint64_t val, unsigned size)
1364{
1365    E1000State *s = opaque;
1366
1367    (void)s;
1368}
1369
1370static const MemoryRegionOps e1000_io_ops = {
1371    .read = e1000_io_read,
1372    .write = e1000_io_write,
1373    .endianness = DEVICE_LITTLE_ENDIAN,
1374};
1375
1376static bool is_version_1(void *opaque, int version_id)
1377{
1378    return version_id == 1;
1379}
1380
1381static int e1000_pre_save(void *opaque)
1382{
1383    E1000State *s = opaque;
1384    NetClientState *nc = qemu_get_queue(s->nic);
1385
1386    /*
1387     * If link is down and auto-negotiation is supported and ongoing,
1388     * complete auto-negotiation immediately. This allows us to look
1389     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1390     */
1391    if (nc->link_down && have_autoneg(s)) {
1392        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1393    }
1394
1395    /* Decide which set of props to migrate in the main structure */
1396    if (chkflag(TSO) || !s->use_tso_for_migration) {
1397        /* Either we're migrating with the extra subsection, in which
1398         * case the mig_props is always 'props' OR
1399         * we've not got the subsection, but 'props' was the last
1400         * updated.
1401         */
1402        s->mig_props = s->tx.props;
1403    } else {
1404        /* We're not using the subsection, and 'tso_props' was
1405         * the last updated.
1406         */
1407        s->mig_props = s->tx.tso_props;
1408    }
1409    return 0;
1410}
1411
1412static int e1000_post_load(void *opaque, int version_id)
1413{
1414    E1000State *s = opaque;
1415    NetClientState *nc = qemu_get_queue(s->nic);
1416
1417    if (!chkflag(MIT)) {
1418        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1419            s->mac_reg[TADV] = 0;
1420        s->mit_irq_level = false;
1421    }
1422    s->mit_ide = 0;
1423    s->mit_timer_on = true;
1424    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1425
1426    /* nc.link_down can't be migrated, so infer link_down according
1427     * to link status bit in mac_reg[STATUS].
1428     * Alternatively, restart link negotiation if it was in progress. */
1429    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1430
1431    if (have_autoneg(s) &&
1432        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1433        nc->link_down = false;
1434        timer_mod(s->autoneg_timer,
1435                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1436    }
1437
1438    s->tx.props = s->mig_props;
1439    if (!s->received_tx_tso) {
1440        /* We received only one set of offload data (tx.props)
1441         * and haven't got tx.tso_props.  The best we can do
1442         * is dupe the data.
1443         */
1444        s->tx.tso_props = s->mig_props;
1445    }
1446    return 0;
1447}
1448
1449static int e1000_tx_tso_post_load(void *opaque, int version_id)
1450{
1451    E1000State *s = opaque;
1452    s->received_tx_tso = true;
1453    return 0;
1454}
1455
1456static bool e1000_mit_state_needed(void *opaque)
1457{
1458    E1000State *s = opaque;
1459
1460    return chkflag(MIT);
1461}
1462
1463static bool e1000_full_mac_needed(void *opaque)
1464{
1465    E1000State *s = opaque;
1466
1467    return chkflag(MAC);
1468}
1469
1470static bool e1000_tso_state_needed(void *opaque)
1471{
1472    E1000State *s = opaque;
1473
1474    return chkflag(TSO);
1475}
1476
1477static const VMStateDescription vmstate_e1000_mit_state = {
1478    .name = "e1000/mit_state",
1479    .version_id = 1,
1480    .minimum_version_id = 1,
1481    .needed = e1000_mit_state_needed,
1482    .fields = (VMStateField[]) {
1483        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1484        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1485        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1486        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1487        VMSTATE_BOOL(mit_irq_level, E1000State),
1488        VMSTATE_END_OF_LIST()
1489    }
1490};
1491
1492static const VMStateDescription vmstate_e1000_full_mac_state = {
1493    .name = "e1000/full_mac_state",
1494    .version_id = 1,
1495    .minimum_version_id = 1,
1496    .needed = e1000_full_mac_needed,
1497    .fields = (VMStateField[]) {
1498        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1499        VMSTATE_END_OF_LIST()
1500    }
1501};
1502
1503static const VMStateDescription vmstate_e1000_tx_tso_state = {
1504    .name = "e1000/tx_tso_state",
1505    .version_id = 1,
1506    .minimum_version_id = 1,
1507    .needed = e1000_tso_state_needed,
1508    .post_load = e1000_tx_tso_post_load,
1509    .fields = (VMStateField[]) {
1510        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1511        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1512        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1513        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1514        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1515        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1516        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1517        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1518        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1519        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1520        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1521        VMSTATE_END_OF_LIST()
1522    }
1523};
1524
1525static const VMStateDescription vmstate_e1000 = {
1526    .name = "e1000",
1527    .version_id = 2,
1528    .minimum_version_id = 1,
1529    .pre_save = e1000_pre_save,
1530    .post_load = e1000_post_load,
1531    .fields = (VMStateField[]) {
1532        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1533        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1534        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1535        VMSTATE_UINT32(rxbuf_size, E1000State),
1536        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1537        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1538        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1539        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1540        VMSTATE_UINT16(eecd_state.reading, E1000State),
1541        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1542        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1543        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1544        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1545        VMSTATE_UINT8(mig_props.tucss, E1000State),
1546        VMSTATE_UINT8(mig_props.tucso, E1000State),
1547        VMSTATE_UINT16(mig_props.tucse, E1000State),
1548        VMSTATE_UINT32(mig_props.paylen, E1000State),
1549        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1550        VMSTATE_UINT16(mig_props.mss, E1000State),
1551        VMSTATE_UINT16(tx.size, E1000State),
1552        VMSTATE_UINT16(tx.tso_frames, E1000State),
1553        VMSTATE_UINT8(tx.sum_needed, E1000State),
1554        VMSTATE_INT8(mig_props.ip, E1000State),
1555        VMSTATE_INT8(mig_props.tcp, E1000State),
1556        VMSTATE_BUFFER(tx.header, E1000State),
1557        VMSTATE_BUFFER(tx.data, E1000State),
1558        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1559        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1560        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1561        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1562        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1563        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1564        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1565        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1566        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1567        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1568        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1569        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1570        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1571        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1572        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1573        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1574        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1575        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1576        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1577        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1578        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1579        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1580        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1581        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1582        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1583        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1584        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1585        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1586        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1587        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1588        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1589        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1590        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1591        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1592        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1593        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1594        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1595        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1596        VMSTATE_UINT32(mac_reg[VET], E1000State),
1597        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1598        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1599        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1600        VMSTATE_END_OF_LIST()
1601    },
1602    .subsections = (const VMStateDescription*[]) {
1603        &vmstate_e1000_mit_state,
1604        &vmstate_e1000_full_mac_state,
1605        &vmstate_e1000_tx_tso_state,
1606        NULL
1607    }
1608};
1609
1610/*
1611 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1612 * Note: A valid DevId will be inserted during pci_e1000_realize().
1613 */
1614static const uint16_t e1000_eeprom_template[64] = {
1615    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1616    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1617    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1618    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1619    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1620    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1621    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1622    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1623};
1624
1625/* PCI interface */
1626
1627static void
1628e1000_mmio_setup(E1000State *d)
1629{
1630    int i;
1631    const uint32_t excluded_regs[] = {
1632        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1633        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1634    };
1635
1636    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1637                          "e1000-mmio", PNPMMIO_SIZE);
1638    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1639    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1640        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1641                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1642    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1643}
1644
1645static void
1646pci_e1000_uninit(PCIDevice *dev)
1647{
1648    E1000State *d = E1000(dev);
1649
1650    timer_del(d->autoneg_timer);
1651    timer_free(d->autoneg_timer);
1652    timer_del(d->mit_timer);
1653    timer_free(d->mit_timer);
1654    timer_del(d->flush_queue_timer);
1655    timer_free(d->flush_queue_timer);
1656    qemu_del_nic(d->nic);
1657}
1658
1659static NetClientInfo net_e1000_info = {
1660    .type = NET_CLIENT_DRIVER_NIC,
1661    .size = sizeof(NICState),
1662    .can_receive = e1000_can_receive,
1663    .receive = e1000_receive,
1664    .receive_iov = e1000_receive_iov,
1665    .link_status_changed = e1000_set_link_status,
1666};
1667
1668static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1669                                uint32_t val, int len)
1670{
1671    E1000State *s = E1000(pci_dev);
1672
1673    pci_default_write_config(pci_dev, address, val, len);
1674
1675    if (range_covers_byte(address, len, PCI_COMMAND) &&
1676        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1677        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1678    }
1679}
1680
1681static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1682{
1683    DeviceState *dev = DEVICE(pci_dev);
1684    E1000State *d = E1000(pci_dev);
1685    uint8_t *pci_conf;
1686    uint8_t *macaddr;
1687
1688    pci_dev->config_write = e1000_write_config;
1689
1690    pci_conf = pci_dev->config;
1691
1692    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1693    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1694
1695    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1696
1697    e1000_mmio_setup(d);
1698
1699    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1700
1701    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1702
1703    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1704    macaddr = d->conf.macaddr.a;
1705
1706    e1000x_core_prepare_eeprom(d->eeprom_data,
1707                               e1000_eeprom_template,
1708                               sizeof(e1000_eeprom_template),
1709                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1710                               macaddr);
1711
1712    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1713                          object_get_typename(OBJECT(d)), dev->id, d);
1714
1715    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1716
1717    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1718    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1719    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1720                                        e1000_flush_queue_timer, d);
1721}
1722
1723static void qdev_e1000_reset(DeviceState *dev)
1724{
1725    E1000State *d = E1000(dev);
1726    e1000_reset(d);
1727}
1728
1729static Property e1000_properties[] = {
1730    DEFINE_NIC_PROPERTIES(E1000State, conf),
1731    DEFINE_PROP_BIT("autonegotiation", E1000State,
1732                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1733    DEFINE_PROP_BIT("mitigation", E1000State,
1734                    compat_flags, E1000_FLAG_MIT_BIT, true),
1735    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1736                    compat_flags, E1000_FLAG_MAC_BIT, true),
1737    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1738                    compat_flags, E1000_FLAG_TSO_BIT, true),
1739    DEFINE_PROP_END_OF_LIST(),
1740};
1741
1742typedef struct E1000Info {
1743    const char *name;
1744    uint16_t   device_id;
1745    uint8_t    revision;
1746    uint16_t   phy_id2;
1747} E1000Info;
1748
1749static void e1000_class_init(ObjectClass *klass, void *data)
1750{
1751    DeviceClass *dc = DEVICE_CLASS(klass);
1752    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1753    E1000BaseClass *e = E1000_CLASS(klass);
1754    const E1000Info *info = data;
1755
1756    k->realize = pci_e1000_realize;
1757    k->exit = pci_e1000_uninit;
1758    k->romfile = "efi-e1000.rom";
1759    k->vendor_id = PCI_VENDOR_ID_INTEL;
1760    k->device_id = info->device_id;
1761    k->revision = info->revision;
1762    e->phy_id2 = info->phy_id2;
1763    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1764    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1765    dc->desc = "Intel Gigabit Ethernet";
1766    dc->reset = qdev_e1000_reset;
1767    dc->vmsd = &vmstate_e1000;
1768    device_class_set_props(dc, e1000_properties);
1769}
1770
1771static void e1000_instance_init(Object *obj)
1772{
1773    E1000State *n = E1000(obj);
1774    device_add_bootindex_property(obj, &n->conf.bootindex,
1775                                  "bootindex", "/ethernet-phy@0",
1776                                  DEVICE(n));
1777}
1778
1779static const TypeInfo e1000_base_info = {
1780    .name          = TYPE_E1000_BASE,
1781    .parent        = TYPE_PCI_DEVICE,
1782    .instance_size = sizeof(E1000State),
1783    .instance_init = e1000_instance_init,
1784    .class_size    = sizeof(E1000BaseClass),
1785    .abstract      = true,
1786    .interfaces = (InterfaceInfo[]) {
1787        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1788        { },
1789    },
1790};
1791
1792static const E1000Info e1000_devices[] = {
1793    {
1794        .name      = "e1000",
1795        .device_id = E1000_DEV_ID_82540EM,
1796        .revision  = 0x03,
1797        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1798    },
1799    {
1800        .name      = "e1000-82544gc",
1801        .device_id = E1000_DEV_ID_82544GC_COPPER,
1802        .revision  = 0x03,
1803        .phy_id2   = E1000_PHY_ID2_82544x,
1804    },
1805    {
1806        .name      = "e1000-82545em",
1807        .device_id = E1000_DEV_ID_82545EM_COPPER,
1808        .revision  = 0x03,
1809        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1810    },
1811};
1812
1813static void e1000_register_types(void)
1814{
1815    int i;
1816
1817    type_register_static(&e1000_base_info);
1818    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1819        const E1000Info *info = &e1000_devices[i];
1820        TypeInfo type_info = {};
1821
1822        type_info.name = info->name;
1823        type_info.parent = TYPE_E1000_BASE;
1824        type_info.class_data = (void *)info;
1825        type_info.class_init = e1000_class_init;
1826
1827        type_register(&type_info);
1828    }
1829}
1830
1831type_init(e1000_register_types)
1832