qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/hw.h"
  30#include "hw/pci/pci.h"
  31#include "net/net.h"
  32#include "net/checksum.h"
  33#include "hw/loader.h"
  34#include "sysemu/sysemu.h"
  35#include "sysemu/dma.h"
  36#include "qemu/iov.h"
  37#include "qemu/range.h"
  38
  39#include "e1000x_common.h"
  40
  41static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  42
  43/* #define E1000_DEBUG */
  44
  45#ifdef E1000_DEBUG
  46enum {
  47    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  48    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  49    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  50    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  51};
  52#define DBGBIT(x)    (1<<DEBUG_##x)
  53static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  54
  55#define DBGOUT(what, fmt, ...) do { \
  56    if (debugflags & DBGBIT(what)) \
  57        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  58    } while (0)
  59#else
  60#define DBGOUT(what, fmt, ...) do {} while (0)
  61#endif
  62
  63#define IOPORT_SIZE       0x40
  64#define PNPMMIO_SIZE      0x20000
  65#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  66
  67#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  68
  69/*
  70 * HW models:
  71 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  72 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  73 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  74 *  Others never tested
  75 */
  76
  77typedef struct E1000State_st {
  78    /*< private >*/
  79    PCIDevice parent_obj;
  80    /*< public >*/
  81
  82    NICState *nic;
  83    NICConf conf;
  84    MemoryRegion mmio;
  85    MemoryRegion io;
  86
  87    uint32_t mac_reg[0x8000];
  88    uint16_t phy_reg[0x20];
  89    uint16_t eeprom_data[64];
  90
  91    uint32_t rxbuf_size;
  92    uint32_t rxbuf_min_shift;
  93    struct e1000_tx {
  94        unsigned char header[256];
  95        unsigned char vlan_header[4];
  96        /* Fields vlan and data must not be reordered or separated. */
  97        unsigned char vlan[4];
  98        unsigned char data[0x10000];
  99        uint16_t size;
 100        unsigned char vlan_needed;
 101        e1000x_txd_props props;
 102        uint16_t tso_frames;
 103    } tx;
 104
 105    struct {
 106        uint32_t val_in;    /* shifted in from guest driver */
 107        uint16_t bitnum_in;
 108        uint16_t bitnum_out;
 109        uint16_t reading;
 110        uint32_t old_eecd;
 111    } eecd_state;
 112
 113    QEMUTimer *autoneg_timer;
 114
 115    QEMUTimer *mit_timer;      /* Mitigation timer. */
 116    bool mit_timer_on;         /* Mitigation timer is running. */
 117    bool mit_irq_level;        /* Tracks interrupt pin level. */
 118    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 119
 120/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 121#define E1000_FLAG_AUTONEG_BIT 0
 122#define E1000_FLAG_MIT_BIT 1
 123#define E1000_FLAG_MAC_BIT 2
 124#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 125#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 126#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 127    uint32_t compat_flags;
 128} E1000State;
 129
 130#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 131
 132typedef struct E1000BaseClass {
 133    PCIDeviceClass parent_class;
 134    uint16_t phy_id2;
 135} E1000BaseClass;
 136
 137#define TYPE_E1000_BASE "e1000-base"
 138
 139#define E1000(obj) \
 140    OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
 141
 142#define E1000_DEVICE_CLASS(klass) \
 143     OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
 144#define E1000_DEVICE_GET_CLASS(obj) \
 145    OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
 146
 147static void
 148e1000_link_up(E1000State *s)
 149{
 150    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 151
 152    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 153    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 154}
 155
 156static void
 157e1000_autoneg_done(E1000State *s)
 158{
 159    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 160
 161    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 162    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 163}
 164
 165static bool
 166have_autoneg(E1000State *s)
 167{
 168    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 169}
 170
 171static void
 172set_phy_ctrl(E1000State *s, int index, uint16_t val)
 173{
 174    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 175    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 176                                   MII_CR_RESET |
 177                                   MII_CR_RESTART_AUTO_NEG);
 178
 179    /*
 180     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 181     * migrate during auto negotiation, after migration the link will be
 182     * down.
 183     */
 184    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 185        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 186    }
 187}
 188
 189static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 190    [PHY_CTRL] = set_phy_ctrl,
 191};
 192
 193enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 194
 195enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 196static const char phy_regcap[0x20] = {
 197    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 198    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 199    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 200    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 201    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 202    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 203    [PHY_AUTONEG_EXP] = PHY_R,
 204};
 205
 206/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 207static const uint16_t phy_reg_init[] = {
 208    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 209                   MII_CR_FULL_DUPLEX |
 210                   MII_CR_AUTO_NEG_EN,
 211
 212    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 213                   MII_SR_LINK_STATUS |   /* link initially up */
 214                   MII_SR_AUTONEG_CAPS |
 215                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 216                   MII_SR_PREAMBLE_SUPPRESS |
 217                   MII_SR_EXTENDED_STATUS |
 218                   MII_SR_10T_HD_CAPS |
 219                   MII_SR_10T_FD_CAPS |
 220                   MII_SR_100X_HD_CAPS |
 221                   MII_SR_100X_FD_CAPS,
 222
 223    [PHY_ID1] = 0x141,
 224    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 225    [PHY_AUTONEG_ADV] = 0xde1,
 226    [PHY_LP_ABILITY] = 0x1e0,
 227    [PHY_1000T_CTRL] = 0x0e00,
 228    [PHY_1000T_STATUS] = 0x3c00,
 229    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 230    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 231    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 232};
 233
 234static const uint32_t mac_reg_init[] = {
 235    [PBA]     = 0x00100030,
 236    [LEDCTL]  = 0x602,
 237    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 238                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 239    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 240                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 241                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 242                E1000_STATUS_LU,
 243    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 244                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 245                E1000_MANC_RMCP_EN,
 246};
 247
 248/* Helper function, *curr == 0 means the value is not set */
 249static inline void
 250mit_update_delay(uint32_t *curr, uint32_t value)
 251{
 252    if (value && (*curr == 0 || value < *curr)) {
 253        *curr = value;
 254    }
 255}
 256
 257static void
 258set_interrupt_cause(E1000State *s, int index, uint32_t val)
 259{
 260    PCIDevice *d = PCI_DEVICE(s);
 261    uint32_t pending_ints;
 262    uint32_t mit_delay;
 263
 264    s->mac_reg[ICR] = val;
 265
 266    /*
 267     * Make sure ICR and ICS registers have the same value.
 268     * The spec says that the ICS register is write-only.  However in practice,
 269     * on real hardware ICS is readable, and for reads it has the same value as
 270     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 271     *
 272     * The VxWorks PRO/1000 driver uses this behaviour.
 273     */
 274    s->mac_reg[ICS] = val;
 275
 276    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 277    if (!s->mit_irq_level && pending_ints) {
 278        /*
 279         * Here we detect a potential raising edge. We postpone raising the
 280         * interrupt line if we are inside the mitigation delay window
 281         * (s->mit_timer_on == 1).
 282         * We provide a partial implementation of interrupt mitigation,
 283         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 284         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 285         * RADV; relative timers based on TIDV and RDTR are not implemented.
 286         */
 287        if (s->mit_timer_on) {
 288            return;
 289        }
 290        if (chkflag(MIT)) {
 291            /* Compute the next mitigation delay according to pending
 292             * interrupts and the current values of RADV (provided
 293             * RDTR!=0), TADV and ITR.
 294             * Then rearm the timer.
 295             */
 296            mit_delay = 0;
 297            if (s->mit_ide &&
 298                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 299                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 300            }
 301            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 302                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 303            }
 304            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 305
 306            /*
 307             * According to e1000 SPEC, the Ethernet controller guarantees
 308             * a maximum observable interrupt rate of 7813 interrupts/sec.
 309             * Thus if mit_delay < 500 then the delay should be set to the
 310             * minimum delay possible which is 500.
 311             */
 312            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 313
 314            s->mit_timer_on = 1;
 315            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 316                      mit_delay * 256);
 317            s->mit_ide = 0;
 318        }
 319    }
 320
 321    s->mit_irq_level = (pending_ints != 0);
 322    pci_set_irq(d, s->mit_irq_level);
 323}
 324
 325static void
 326e1000_mit_timer(void *opaque)
 327{
 328    E1000State *s = opaque;
 329
 330    s->mit_timer_on = 0;
 331    /* Call set_interrupt_cause to update the irq level (if necessary). */
 332    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 333}
 334
 335static void
 336set_ics(E1000State *s, int index, uint32_t val)
 337{
 338    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 339        s->mac_reg[IMS]);
 340    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 341}
 342
 343static void
 344e1000_autoneg_timer(void *opaque)
 345{
 346    E1000State *s = opaque;
 347    if (!qemu_get_queue(s->nic)->link_down) {
 348        e1000_autoneg_done(s);
 349        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 350    }
 351}
 352
 353static void e1000_reset(void *opaque)
 354{
 355    E1000State *d = opaque;
 356    E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
 357    uint8_t *macaddr = d->conf.macaddr.a;
 358
 359    timer_del(d->autoneg_timer);
 360    timer_del(d->mit_timer);
 361    d->mit_timer_on = 0;
 362    d->mit_irq_level = 0;
 363    d->mit_ide = 0;
 364    memset(d->phy_reg, 0, sizeof d->phy_reg);
 365    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 366    d->phy_reg[PHY_ID2] = edc->phy_id2;
 367    memset(d->mac_reg, 0, sizeof d->mac_reg);
 368    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 369    d->rxbuf_min_shift = 1;
 370    memset(&d->tx, 0, sizeof d->tx);
 371
 372    if (qemu_get_queue(d->nic)->link_down) {
 373        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 374    }
 375
 376    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 377}
 378
 379static void
 380set_ctrl(E1000State *s, int index, uint32_t val)
 381{
 382    /* RST is self clearing */
 383    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 384}
 385
 386static void
 387set_rx_control(E1000State *s, int index, uint32_t val)
 388{
 389    s->mac_reg[RCTL] = val;
 390    s->rxbuf_size = e1000x_rxbufsize(val);
 391    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 392    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 393           s->mac_reg[RCTL]);
 394    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 395}
 396
 397static void
 398set_mdic(E1000State *s, int index, uint32_t val)
 399{
 400    uint32_t data = val & E1000_MDIC_DATA_MASK;
 401    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 402
 403    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 404        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 405    else if (val & E1000_MDIC_OP_READ) {
 406        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 407        if (!(phy_regcap[addr] & PHY_R)) {
 408            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 409            val |= E1000_MDIC_ERROR;
 410        } else
 411            val = (val ^ data) | s->phy_reg[addr];
 412    } else if (val & E1000_MDIC_OP_WRITE) {
 413        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 414        if (!(phy_regcap[addr] & PHY_W)) {
 415            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 416            val |= E1000_MDIC_ERROR;
 417        } else {
 418            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 419                phyreg_writeops[addr](s, index, data);
 420            } else {
 421                s->phy_reg[addr] = data;
 422            }
 423        }
 424    }
 425    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 426
 427    if (val & E1000_MDIC_INT_EN) {
 428        set_ics(s, 0, E1000_ICR_MDAC);
 429    }
 430}
 431
 432static uint32_t
 433get_eecd(E1000State *s, int index)
 434{
 435    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 436
 437    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 438           s->eecd_state.bitnum_out, s->eecd_state.reading);
 439    if (!s->eecd_state.reading ||
 440        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 441          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 442        ret |= E1000_EECD_DO;
 443    return ret;
 444}
 445
 446static void
 447set_eecd(E1000State *s, int index, uint32_t val)
 448{
 449    uint32_t oldval = s->eecd_state.old_eecd;
 450
 451    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 452            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 453    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 454        return;
 455    }
 456    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 457        s->eecd_state.val_in = 0;
 458        s->eecd_state.bitnum_in = 0;
 459        s->eecd_state.bitnum_out = 0;
 460        s->eecd_state.reading = 0;
 461    }
 462    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 463        return;
 464    }
 465    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 466        s->eecd_state.bitnum_out++;
 467        return;
 468    }
 469    s->eecd_state.val_in <<= 1;
 470    if (val & E1000_EECD_DI)
 471        s->eecd_state.val_in |= 1;
 472    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 473        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 474        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 475            EEPROM_READ_OPCODE_MICROWIRE);
 476    }
 477    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 478           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 479           s->eecd_state.reading);
 480}
 481
 482static uint32_t
 483flash_eerd_read(E1000State *s, int x)
 484{
 485    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 486
 487    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 488        return (s->mac_reg[EERD]);
 489
 490    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 491        return (E1000_EEPROM_RW_REG_DONE | r);
 492
 493    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 494           E1000_EEPROM_RW_REG_DONE | r);
 495}
 496
 497static void
 498putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 499{
 500    uint32_t sum;
 501
 502    if (cse && cse < n)
 503        n = cse + 1;
 504    if (sloc < n-1) {
 505        sum = net_checksum_add(n-css, data+css);
 506        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 507    }
 508}
 509
 510static inline void
 511inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 512{
 513    if (!memcmp(arr, bcast, sizeof bcast)) {
 514        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 515    } else if (arr[0] & 1) {
 516        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 517    }
 518}
 519
 520static void
 521e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 522{
 523    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 524                                    PTC1023, PTC1522 };
 525
 526    NetClientState *nc = qemu_get_queue(s->nic);
 527    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 528        nc->info->receive(nc, buf, size);
 529    } else {
 530        qemu_send_packet(nc, buf, size);
 531    }
 532    inc_tx_bcast_or_mcast_count(s, buf);
 533    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 534}
 535
 536static void
 537xmit_seg(E1000State *s)
 538{
 539    uint16_t len;
 540    unsigned int frames = s->tx.tso_frames, css, sofar;
 541    struct e1000_tx *tp = &s->tx;
 542
 543    if (tp->props.tse && tp->props.cptse) {
 544        css = tp->props.ipcss;
 545        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 546               frames, tp->size, css);
 547        if (tp->props.ip) {    /* IPv4 */
 548            stw_be_p(tp->data+css+2, tp->size - css);
 549            stw_be_p(tp->data+css+4,
 550                     lduw_be_p(tp->data + css + 4) + frames);
 551        } else {         /* IPv6 */
 552            stw_be_p(tp->data+css+4, tp->size - css);
 553        }
 554        css = tp->props.tucss;
 555        len = tp->size - css;
 556        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->props.tcp, css, len);
 557        if (tp->props.tcp) {
 558            sofar = frames * tp->props.mss;
 559            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 560            if (tp->props.paylen - sofar > tp->props.mss) {
 561                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 562            } else if (frames) {
 563                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 564            }
 565        } else    /* UDP */
 566            stw_be_p(tp->data+css+4, len);
 567        if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) {
 568            unsigned int phsum;
 569            // add pseudo-header length before checksum calculation
 570            void *sp = tp->data + tp->props.tucso;
 571
 572            phsum = lduw_be_p(sp) + len;
 573            phsum = (phsum >> 16) + (phsum & 0xffff);
 574            stw_be_p(sp, phsum);
 575        }
 576        tp->tso_frames++;
 577    }
 578
 579    if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) {
 580        putsum(tp->data, tp->size, tp->props.tucso,
 581               tp->props.tucss, tp->props.tucse);
 582    }
 583    if (tp->props.sum_needed & E1000_TXD_POPTS_IXSM) {
 584        putsum(tp->data, tp->size, tp->props.ipcso,
 585               tp->props.ipcss, tp->props.ipcse);
 586    }
 587    if (tp->vlan_needed) {
 588        memmove(tp->vlan, tp->data, 4);
 589        memmove(tp->data, tp->data + 4, 8);
 590        memcpy(tp->data + 8, tp->vlan_header, 4);
 591        e1000_send_packet(s, tp->vlan, tp->size + 4);
 592    } else {
 593        e1000_send_packet(s, tp->data, tp->size);
 594    }
 595
 596    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 597    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 598    s->mac_reg[GPTC] = s->mac_reg[TPT];
 599    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 600    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 601}
 602
 603static void
 604process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 605{
 606    PCIDevice *d = PCI_DEVICE(s);
 607    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 608    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 609    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 610    unsigned int msh = 0xfffff;
 611    uint64_t addr;
 612    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 613    struct e1000_tx *tp = &s->tx;
 614
 615    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 616    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 617        e1000x_read_tx_ctx_descr(xp, &tp->props);
 618        tp->tso_frames = 0;
 619        if (tp->props.tucso == 0) {    /* this is probably wrong */
 620            DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
 621            tp->props.tucso = tp->props.tucss + (tp->props.tcp ? 16 : 6);
 622        }
 623        return;
 624    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 625        // data descriptor
 626        if (tp->size == 0) {
 627            tp->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 628        }
 629        tp->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 630    } else {
 631        // legacy descriptor
 632        tp->props.cptse = 0;
 633    }
 634
 635    if (e1000x_vlan_enabled(s->mac_reg) &&
 636        e1000x_is_vlan_txd(txd_lower) &&
 637        (tp->props.cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 638        tp->vlan_needed = 1;
 639        stw_be_p(tp->vlan_header,
 640                      le16_to_cpu(s->mac_reg[VET]));
 641        stw_be_p(tp->vlan_header + 2,
 642                      le16_to_cpu(dp->upper.fields.special));
 643    }
 644
 645    addr = le64_to_cpu(dp->buffer_addr);
 646    if (tp->props.tse && tp->props.cptse) {
 647        msh = tp->props.hdr_len + tp->props.mss;
 648        do {
 649            bytes = split_size;
 650            if (tp->size + bytes > msh)
 651                bytes = msh - tp->size;
 652
 653            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 654            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 655            sz = tp->size + bytes;
 656            if (sz >= tp->props.hdr_len && tp->size < tp->props.hdr_len) {
 657                memmove(tp->header, tp->data, tp->props.hdr_len);
 658            }
 659            tp->size = sz;
 660            addr += bytes;
 661            if (sz == msh) {
 662                xmit_seg(s);
 663                memmove(tp->data, tp->header, tp->props.hdr_len);
 664                tp->size = tp->props.hdr_len;
 665            }
 666            split_size -= bytes;
 667        } while (bytes && split_size);
 668    } else if (!tp->props.tse && tp->props.cptse) {
 669        // context descriptor TSE is not set, while data descriptor TSE is set
 670        DBGOUT(TXERR, "TCP segmentation error\n");
 671    } else {
 672        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 673        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 674        tp->size += split_size;
 675    }
 676
 677    if (!(txd_lower & E1000_TXD_CMD_EOP))
 678        return;
 679    if (!(tp->props.tse && tp->props.cptse && tp->size < tp->props.hdr_len)) {
 680        xmit_seg(s);
 681    }
 682    tp->tso_frames = 0;
 683    tp->props.sum_needed = 0;
 684    tp->vlan_needed = 0;
 685    tp->size = 0;
 686    tp->props.cptse = 0;
 687}
 688
 689static uint32_t
 690txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 691{
 692    PCIDevice *d = PCI_DEVICE(s);
 693    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 694
 695    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 696        return 0;
 697    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 698                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 699    dp->upper.data = cpu_to_le32(txd_upper);
 700    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 701                  &dp->upper, sizeof(dp->upper));
 702    return E1000_ICR_TXDW;
 703}
 704
 705static uint64_t tx_desc_base(E1000State *s)
 706{
 707    uint64_t bah = s->mac_reg[TDBAH];
 708    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 709
 710    return (bah << 32) + bal;
 711}
 712
 713static void
 714start_xmit(E1000State *s)
 715{
 716    PCIDevice *d = PCI_DEVICE(s);
 717    dma_addr_t base;
 718    struct e1000_tx_desc desc;
 719    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 720
 721    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 722        DBGOUT(TX, "tx disabled\n");
 723        return;
 724    }
 725
 726    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 727        base = tx_desc_base(s) +
 728               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 729        pci_dma_read(d, base, &desc, sizeof(desc));
 730
 731        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 732               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 733               desc.upper.data);
 734
 735        process_tx_desc(s, &desc);
 736        cause |= txdesc_writeback(s, base, &desc);
 737
 738        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 739            s->mac_reg[TDH] = 0;
 740        /*
 741         * the following could happen only if guest sw assigns
 742         * bogus values to TDT/TDLEN.
 743         * there's nothing too intelligent we could do about this.
 744         */
 745        if (s->mac_reg[TDH] == tdh_start ||
 746            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 747            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 748                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 749            break;
 750        }
 751    }
 752    set_ics(s, 0, cause);
 753}
 754
 755static int
 756receive_filter(E1000State *s, const uint8_t *buf, int size)
 757{
 758    uint32_t rctl = s->mac_reg[RCTL];
 759    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 760
 761    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 762        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 763        uint16_t vid = lduw_be_p(buf + 14);
 764        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 765                                 ((vid >> 5) & 0x7f));
 766        if ((vfta & (1 << (vid & 0x1f))) == 0)
 767            return 0;
 768    }
 769
 770    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 771        return 1;
 772    }
 773
 774    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 775        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 776        return 1;
 777    }
 778
 779    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 780        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 781        return 1;
 782    }
 783
 784    return e1000x_rx_group_filter(s->mac_reg, buf);
 785}
 786
 787static void
 788e1000_set_link_status(NetClientState *nc)
 789{
 790    E1000State *s = qemu_get_nic_opaque(nc);
 791    uint32_t old_status = s->mac_reg[STATUS];
 792
 793    if (nc->link_down) {
 794        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 795    } else {
 796        if (have_autoneg(s) &&
 797            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 798            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 799        } else {
 800            e1000_link_up(s);
 801        }
 802    }
 803
 804    if (s->mac_reg[STATUS] != old_status)
 805        set_ics(s, 0, E1000_ICR_LSC);
 806}
 807
 808static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 809{
 810    int bufs;
 811    /* Fast-path short packets */
 812    if (total_size <= s->rxbuf_size) {
 813        return s->mac_reg[RDH] != s->mac_reg[RDT];
 814    }
 815    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 816        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 817    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 818        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 819            s->mac_reg[RDT] - s->mac_reg[RDH];
 820    } else {
 821        return false;
 822    }
 823    return total_size <= bufs * s->rxbuf_size;
 824}
 825
 826static int
 827e1000_can_receive(NetClientState *nc)
 828{
 829    E1000State *s = qemu_get_nic_opaque(nc);
 830
 831    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 832        e1000_has_rxbufs(s, 1);
 833}
 834
 835static uint64_t rx_desc_base(E1000State *s)
 836{
 837    uint64_t bah = s->mac_reg[RDBAH];
 838    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 839
 840    return (bah << 32) + bal;
 841}
 842
 843static ssize_t
 844e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 845{
 846    E1000State *s = qemu_get_nic_opaque(nc);
 847    PCIDevice *d = PCI_DEVICE(s);
 848    struct e1000_rx_desc desc;
 849    dma_addr_t base;
 850    unsigned int n, rdt;
 851    uint32_t rdh_start;
 852    uint16_t vlan_special = 0;
 853    uint8_t vlan_status = 0;
 854    uint8_t min_buf[MIN_BUF_SIZE];
 855    struct iovec min_iov;
 856    uint8_t *filter_buf = iov->iov_base;
 857    size_t size = iov_size(iov, iovcnt);
 858    size_t iov_ofs = 0;
 859    size_t desc_offset;
 860    size_t desc_size;
 861    size_t total_size;
 862
 863    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 864        return -1;
 865    }
 866
 867    /* Pad to minimum Ethernet frame length */
 868    if (size < sizeof(min_buf)) {
 869        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 870        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 871        e1000x_inc_reg_if_not_full(s->mac_reg, RUC);
 872        min_iov.iov_base = filter_buf = min_buf;
 873        min_iov.iov_len = size = sizeof(min_buf);
 874        iovcnt = 1;
 875        iov = &min_iov;
 876    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 877        /* This is very unlikely, but may happen. */
 878        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 879        filter_buf = min_buf;
 880    }
 881
 882    /* Discard oversized packets if !LPE and !SBP. */
 883    if (e1000x_is_oversized(s->mac_reg, size)) {
 884        return size;
 885    }
 886
 887    if (!receive_filter(s, filter_buf, size)) {
 888        return size;
 889    }
 890
 891    if (e1000x_vlan_enabled(s->mac_reg) &&
 892        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 893        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 894        iov_ofs = 4;
 895        if (filter_buf == iov->iov_base) {
 896            memmove(filter_buf + 4, filter_buf, 12);
 897        } else {
 898            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 899            while (iov->iov_len <= iov_ofs) {
 900                iov_ofs -= iov->iov_len;
 901                iov++;
 902            }
 903        }
 904        vlan_status = E1000_RXD_STAT_VP;
 905        size -= 4;
 906    }
 907
 908    rdh_start = s->mac_reg[RDH];
 909    desc_offset = 0;
 910    total_size = size + e1000x_fcs_len(s->mac_reg);
 911    if (!e1000_has_rxbufs(s, total_size)) {
 912            set_ics(s, 0, E1000_ICS_RXO);
 913            return -1;
 914    }
 915    do {
 916        desc_size = total_size - desc_offset;
 917        if (desc_size > s->rxbuf_size) {
 918            desc_size = s->rxbuf_size;
 919        }
 920        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 921        pci_dma_read(d, base, &desc, sizeof(desc));
 922        desc.special = vlan_special;
 923        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 924        if (desc.buffer_addr) {
 925            if (desc_offset < size) {
 926                size_t iov_copy;
 927                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 928                size_t copy_size = size - desc_offset;
 929                if (copy_size > s->rxbuf_size) {
 930                    copy_size = s->rxbuf_size;
 931                }
 932                do {
 933                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 934                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 935                    copy_size -= iov_copy;
 936                    ba += iov_copy;
 937                    iov_ofs += iov_copy;
 938                    if (iov_ofs == iov->iov_len) {
 939                        iov++;
 940                        iov_ofs = 0;
 941                    }
 942                } while (copy_size);
 943            }
 944            desc_offset += desc_size;
 945            desc.length = cpu_to_le16(desc_size);
 946            if (desc_offset >= total_size) {
 947                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 948            } else {
 949                /* Guest zeroing out status is not a hardware requirement.
 950                   Clear EOP in case guest didn't do it. */
 951                desc.status &= ~E1000_RXD_STAT_EOP;
 952            }
 953        } else { // as per intel docs; skip descriptors with null buf addr
 954            DBGOUT(RX, "Null RX descriptor!!\n");
 955        }
 956        pci_dma_write(d, base, &desc, sizeof(desc));
 957
 958        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 959            s->mac_reg[RDH] = 0;
 960        /* see comment in start_xmit; same here */
 961        if (s->mac_reg[RDH] == rdh_start ||
 962            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
 963            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
 964                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
 965            set_ics(s, 0, E1000_ICS_RXO);
 966            return -1;
 967        }
 968    } while (desc_offset < total_size);
 969
 970    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
 971
 972    n = E1000_ICS_RXT0;
 973    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
 974        rdt += s->mac_reg[RDLEN] / sizeof(desc);
 975    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
 976        s->rxbuf_min_shift)
 977        n |= E1000_ICS_RXDMT0;
 978
 979    set_ics(s, 0, n);
 980
 981    return size;
 982}
 983
 984static ssize_t
 985e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 986{
 987    const struct iovec iov = {
 988        .iov_base = (uint8_t *)buf,
 989        .iov_len = size
 990    };
 991
 992    return e1000_receive_iov(nc, &iov, 1);
 993}
 994
 995static uint32_t
 996mac_readreg(E1000State *s, int index)
 997{
 998    return s->mac_reg[index];
 999}
1000
1001static uint32_t
1002mac_low4_read(E1000State *s, int index)
1003{
1004    return s->mac_reg[index] & 0xf;
1005}
1006
1007static uint32_t
1008mac_low11_read(E1000State *s, int index)
1009{
1010    return s->mac_reg[index] & 0x7ff;
1011}
1012
1013static uint32_t
1014mac_low13_read(E1000State *s, int index)
1015{
1016    return s->mac_reg[index] & 0x1fff;
1017}
1018
1019static uint32_t
1020mac_low16_read(E1000State *s, int index)
1021{
1022    return s->mac_reg[index] & 0xffff;
1023}
1024
1025static uint32_t
1026mac_icr_read(E1000State *s, int index)
1027{
1028    uint32_t ret = s->mac_reg[ICR];
1029
1030    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1031    set_interrupt_cause(s, 0, 0);
1032    return ret;
1033}
1034
1035static uint32_t
1036mac_read_clr4(E1000State *s, int index)
1037{
1038    uint32_t ret = s->mac_reg[index];
1039
1040    s->mac_reg[index] = 0;
1041    return ret;
1042}
1043
1044static uint32_t
1045mac_read_clr8(E1000State *s, int index)
1046{
1047    uint32_t ret = s->mac_reg[index];
1048
1049    s->mac_reg[index] = 0;
1050    s->mac_reg[index-1] = 0;
1051    return ret;
1052}
1053
1054static void
1055mac_writereg(E1000State *s, int index, uint32_t val)
1056{
1057    uint32_t macaddr[2];
1058
1059    s->mac_reg[index] = val;
1060
1061    if (index == RA + 1) {
1062        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1063        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1064        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1065    }
1066}
1067
1068static void
1069set_rdt(E1000State *s, int index, uint32_t val)
1070{
1071    s->mac_reg[index] = val & 0xffff;
1072    if (e1000_has_rxbufs(s, 1)) {
1073        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1074    }
1075}
1076
1077static void
1078set_16bit(E1000State *s, int index, uint32_t val)
1079{
1080    s->mac_reg[index] = val & 0xffff;
1081}
1082
1083static void
1084set_dlen(E1000State *s, int index, uint32_t val)
1085{
1086    s->mac_reg[index] = val & 0xfff80;
1087}
1088
1089static void
1090set_tctl(E1000State *s, int index, uint32_t val)
1091{
1092    s->mac_reg[index] = val;
1093    s->mac_reg[TDT] &= 0xffff;
1094    start_xmit(s);
1095}
1096
1097static void
1098set_icr(E1000State *s, int index, uint32_t val)
1099{
1100    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1101    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1102}
1103
1104static void
1105set_imc(E1000State *s, int index, uint32_t val)
1106{
1107    s->mac_reg[IMS] &= ~val;
1108    set_ics(s, 0, 0);
1109}
1110
1111static void
1112set_ims(E1000State *s, int index, uint32_t val)
1113{
1114    s->mac_reg[IMS] |= val;
1115    set_ics(s, 0, 0);
1116}
1117
1118#define getreg(x)    [x] = mac_readreg
1119static uint32_t (*macreg_readops[])(E1000State *, int) = {
1120    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1121    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1122    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1123    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1124    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1125    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1126    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1127    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1128    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1129    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1130    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1131    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1132    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1133    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1134    getreg(GOTCL),
1135
1136    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1137    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1138    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1139    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1140    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1141    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1142    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1143    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1144    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1145    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1146    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1147    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1148    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1149    [MPTC]    = mac_read_clr4,
1150    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1151    [EERD]    = flash_eerd_read,
1152    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1153    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1154    [RDFPC]   = mac_low13_read,
1155    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1156    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1157    [TDFPC]   = mac_low13_read,
1158    [AIT]     = mac_low16_read,
1159
1160    [CRCERRS ... MPC]   = &mac_readreg,
1161    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1162    [FFLT ... FFLT+6]   = &mac_low11_read,
1163    [RA ... RA+31]      = &mac_readreg,
1164    [WUPM ... WUPM+31]  = &mac_readreg,
1165    [MTA ... MTA+127]   = &mac_readreg,
1166    [VFTA ... VFTA+127] = &mac_readreg,
1167    [FFMT ... FFMT+254] = &mac_low4_read,
1168    [FFVT ... FFVT+254] = &mac_readreg,
1169    [PBM ... PBM+16383] = &mac_readreg,
1170};
1171enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1172
1173#define putreg(x)    [x] = mac_writereg
1174static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1175    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1176    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1177    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1178    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1179    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1180    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1181    putreg(WUS),      putreg(AIT),
1182
1183    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1184    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1185    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1186    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1187    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1188    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1189    [ITR]    = set_16bit,
1190
1191    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1192    [FFLT ... FFLT+6]   = &mac_writereg,
1193    [RA ... RA+31]      = &mac_writereg,
1194    [WUPM ... WUPM+31]  = &mac_writereg,
1195    [MTA ... MTA+127]   = &mac_writereg,
1196    [VFTA ... VFTA+127] = &mac_writereg,
1197    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1198    [PBM ... PBM+16383] = &mac_writereg,
1199};
1200
1201enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1202
1203enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1204
1205#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1206/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1207 * f - flag bits (up to 6 possible flags)
1208 * n - flag needed
1209 * p - partially implenented */
1210static const uint8_t mac_reg_access[0x8000] = {
1211    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1212    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1213
1214    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1215    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1216    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1217    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1218    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1219    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1220    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1221    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1222    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1223    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1224    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1225    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1226    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1227    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1228    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1229    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1230    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1231    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1232    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1233    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1234    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1235    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1236    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1237    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1238    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1239    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1240    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1241    [BPTC]    = markflag(MAC),
1242
1243    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1244    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1245    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1246    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1247    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1248    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1249    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1250    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1251    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1252    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1253    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1254};
1255
1256static void
1257e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1258                 unsigned size)
1259{
1260    E1000State *s = opaque;
1261    unsigned int index = (addr & 0x1ffff) >> 2;
1262
1263    if (index < NWRITEOPS && macreg_writeops[index]) {
1264        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1265            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1266            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1267                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1268                       "It is not fully implemented.\n", index<<2);
1269            }
1270            macreg_writeops[index](s, index, val);
1271        } else {    /* "flag needed" bit is set, but the flag is not active */
1272            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1273                   index<<2);
1274        }
1275    } else if (index < NREADOPS && macreg_readops[index]) {
1276        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1277               index<<2, val);
1278    } else {
1279        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1280               index<<2, val);
1281    }
1282}
1283
1284static uint64_t
1285e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1286{
1287    E1000State *s = opaque;
1288    unsigned int index = (addr & 0x1ffff) >> 2;
1289
1290    if (index < NREADOPS && macreg_readops[index]) {
1291        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1292            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1293            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1294                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1295                       "It is not fully implemented.\n", index<<2);
1296            }
1297            return macreg_readops[index](s, index);
1298        } else {    /* "flag needed" bit is set, but the flag is not active */
1299            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1300                   index<<2);
1301        }
1302    } else {
1303        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1304    }
1305    return 0;
1306}
1307
1308static const MemoryRegionOps e1000_mmio_ops = {
1309    .read = e1000_mmio_read,
1310    .write = e1000_mmio_write,
1311    .endianness = DEVICE_LITTLE_ENDIAN,
1312    .impl = {
1313        .min_access_size = 4,
1314        .max_access_size = 4,
1315    },
1316};
1317
1318static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1319                              unsigned size)
1320{
1321    E1000State *s = opaque;
1322
1323    (void)s;
1324    return 0;
1325}
1326
1327static void e1000_io_write(void *opaque, hwaddr addr,
1328                           uint64_t val, unsigned size)
1329{
1330    E1000State *s = opaque;
1331
1332    (void)s;
1333}
1334
1335static const MemoryRegionOps e1000_io_ops = {
1336    .read = e1000_io_read,
1337    .write = e1000_io_write,
1338    .endianness = DEVICE_LITTLE_ENDIAN,
1339};
1340
1341static bool is_version_1(void *opaque, int version_id)
1342{
1343    return version_id == 1;
1344}
1345
1346static int e1000_pre_save(void *opaque)
1347{
1348    E1000State *s = opaque;
1349    NetClientState *nc = qemu_get_queue(s->nic);
1350
1351    /* If the mitigation timer is active, emulate a timeout now. */
1352    if (s->mit_timer_on) {
1353        e1000_mit_timer(s);
1354    }
1355
1356    /*
1357     * If link is down and auto-negotiation is supported and ongoing,
1358     * complete auto-negotiation immediately. This allows us to look
1359     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1360     */
1361    if (nc->link_down && have_autoneg(s)) {
1362        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1363    }
1364
1365    return 0;
1366}
1367
1368static int e1000_post_load(void *opaque, int version_id)
1369{
1370    E1000State *s = opaque;
1371    NetClientState *nc = qemu_get_queue(s->nic);
1372
1373    if (!chkflag(MIT)) {
1374        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1375            s->mac_reg[TADV] = 0;
1376        s->mit_irq_level = false;
1377    }
1378    s->mit_ide = 0;
1379    s->mit_timer_on = false;
1380
1381    /* nc.link_down can't be migrated, so infer link_down according
1382     * to link status bit in mac_reg[STATUS].
1383     * Alternatively, restart link negotiation if it was in progress. */
1384    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1385
1386    if (have_autoneg(s) &&
1387        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1388        nc->link_down = false;
1389        timer_mod(s->autoneg_timer,
1390                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1391    }
1392
1393    return 0;
1394}
1395
1396static bool e1000_mit_state_needed(void *opaque)
1397{
1398    E1000State *s = opaque;
1399
1400    return chkflag(MIT);
1401}
1402
1403static bool e1000_full_mac_needed(void *opaque)
1404{
1405    E1000State *s = opaque;
1406
1407    return chkflag(MAC);
1408}
1409
1410static const VMStateDescription vmstate_e1000_mit_state = {
1411    .name = "e1000/mit_state",
1412    .version_id = 1,
1413    .minimum_version_id = 1,
1414    .needed = e1000_mit_state_needed,
1415    .fields = (VMStateField[]) {
1416        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1417        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1418        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1419        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1420        VMSTATE_BOOL(mit_irq_level, E1000State),
1421        VMSTATE_END_OF_LIST()
1422    }
1423};
1424
1425static const VMStateDescription vmstate_e1000_full_mac_state = {
1426    .name = "e1000/full_mac_state",
1427    .version_id = 1,
1428    .minimum_version_id = 1,
1429    .needed = e1000_full_mac_needed,
1430    .fields = (VMStateField[]) {
1431        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1432        VMSTATE_END_OF_LIST()
1433    }
1434};
1435
1436static const VMStateDescription vmstate_e1000 = {
1437    .name = "e1000",
1438    .version_id = 2,
1439    .minimum_version_id = 1,
1440    .pre_save = e1000_pre_save,
1441    .post_load = e1000_post_load,
1442    .fields = (VMStateField[]) {
1443        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1444        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1445        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1446        VMSTATE_UINT32(rxbuf_size, E1000State),
1447        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1448        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1449        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1450        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1451        VMSTATE_UINT16(eecd_state.reading, E1000State),
1452        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1453        VMSTATE_UINT8(tx.props.ipcss, E1000State),
1454        VMSTATE_UINT8(tx.props.ipcso, E1000State),
1455        VMSTATE_UINT16(tx.props.ipcse, E1000State),
1456        VMSTATE_UINT8(tx.props.tucss, E1000State),
1457        VMSTATE_UINT8(tx.props.tucso, E1000State),
1458        VMSTATE_UINT16(tx.props.tucse, E1000State),
1459        VMSTATE_UINT32(tx.props.paylen, E1000State),
1460        VMSTATE_UINT8(tx.props.hdr_len, E1000State),
1461        VMSTATE_UINT16(tx.props.mss, E1000State),
1462        VMSTATE_UINT16(tx.size, E1000State),
1463        VMSTATE_UINT16(tx.tso_frames, E1000State),
1464        VMSTATE_UINT8(tx.props.sum_needed, E1000State),
1465        VMSTATE_INT8(tx.props.ip, E1000State),
1466        VMSTATE_INT8(tx.props.tcp, E1000State),
1467        VMSTATE_BUFFER(tx.header, E1000State),
1468        VMSTATE_BUFFER(tx.data, E1000State),
1469        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1470        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1471        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1472        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1473        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1474        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1475        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1476        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1477        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1478        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1479        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1480        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1481        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1482        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1483        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1484        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1485        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1486        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1487        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1488        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1489        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1490        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1491        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1492        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1493        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1494        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1495        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1496        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1497        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1498        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1499        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1500        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1501        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1502        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1503        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1504        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1505        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1506        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1507        VMSTATE_UINT32(mac_reg[VET], E1000State),
1508        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1509        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1510        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1511        VMSTATE_END_OF_LIST()
1512    },
1513    .subsections = (const VMStateDescription*[]) {
1514        &vmstate_e1000_mit_state,
1515        &vmstate_e1000_full_mac_state,
1516        NULL
1517    }
1518};
1519
1520/*
1521 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1522 * Note: A valid DevId will be inserted during pci_e1000_init().
1523 */
1524static const uint16_t e1000_eeprom_template[64] = {
1525    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1526    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1527    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1528    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1529    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1530    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1531    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1532    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1533};
1534
1535/* PCI interface */
1536
1537static void
1538e1000_mmio_setup(E1000State *d)
1539{
1540    int i;
1541    const uint32_t excluded_regs[] = {
1542        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1543        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1544    };
1545
1546    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1547                          "e1000-mmio", PNPMMIO_SIZE);
1548    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1549    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1550        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1551                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1552    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1553}
1554
1555static void
1556pci_e1000_uninit(PCIDevice *dev)
1557{
1558    E1000State *d = E1000(dev);
1559
1560    timer_del(d->autoneg_timer);
1561    timer_free(d->autoneg_timer);
1562    timer_del(d->mit_timer);
1563    timer_free(d->mit_timer);
1564    qemu_del_nic(d->nic);
1565}
1566
1567static NetClientInfo net_e1000_info = {
1568    .type = NET_CLIENT_DRIVER_NIC,
1569    .size = sizeof(NICState),
1570    .can_receive = e1000_can_receive,
1571    .receive = e1000_receive,
1572    .receive_iov = e1000_receive_iov,
1573    .link_status_changed = e1000_set_link_status,
1574};
1575
1576static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1577                                uint32_t val, int len)
1578{
1579    E1000State *s = E1000(pci_dev);
1580
1581    pci_default_write_config(pci_dev, address, val, len);
1582
1583    if (range_covers_byte(address, len, PCI_COMMAND) &&
1584        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1585        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1586    }
1587}
1588
1589static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1590{
1591    DeviceState *dev = DEVICE(pci_dev);
1592    E1000State *d = E1000(pci_dev);
1593    uint8_t *pci_conf;
1594    uint8_t *macaddr;
1595
1596    pci_dev->config_write = e1000_write_config;
1597
1598    pci_conf = pci_dev->config;
1599
1600    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1601    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1602
1603    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1604
1605    e1000_mmio_setup(d);
1606
1607    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1608
1609    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1610
1611    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1612    macaddr = d->conf.macaddr.a;
1613
1614    e1000x_core_prepare_eeprom(d->eeprom_data,
1615                               e1000_eeprom_template,
1616                               sizeof(e1000_eeprom_template),
1617                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1618                               macaddr);
1619
1620    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1621                          object_get_typename(OBJECT(d)), dev->id, d);
1622
1623    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1624
1625    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1626    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1627}
1628
1629static void qdev_e1000_reset(DeviceState *dev)
1630{
1631    E1000State *d = E1000(dev);
1632    e1000_reset(d);
1633}
1634
1635static Property e1000_properties[] = {
1636    DEFINE_NIC_PROPERTIES(E1000State, conf),
1637    DEFINE_PROP_BIT("autonegotiation", E1000State,
1638                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1639    DEFINE_PROP_BIT("mitigation", E1000State,
1640                    compat_flags, E1000_FLAG_MIT_BIT, true),
1641    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1642                    compat_flags, E1000_FLAG_MAC_BIT, true),
1643    DEFINE_PROP_END_OF_LIST(),
1644};
1645
1646typedef struct E1000Info {
1647    const char *name;
1648    uint16_t   device_id;
1649    uint8_t    revision;
1650    uint16_t   phy_id2;
1651} E1000Info;
1652
1653static void e1000_class_init(ObjectClass *klass, void *data)
1654{
1655    DeviceClass *dc = DEVICE_CLASS(klass);
1656    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1657    E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1658    const E1000Info *info = data;
1659
1660    k->realize = pci_e1000_realize;
1661    k->exit = pci_e1000_uninit;
1662    k->romfile = "efi-e1000.rom";
1663    k->vendor_id = PCI_VENDOR_ID_INTEL;
1664    k->device_id = info->device_id;
1665    k->revision = info->revision;
1666    e->phy_id2 = info->phy_id2;
1667    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1668    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1669    dc->desc = "Intel Gigabit Ethernet";
1670    dc->reset = qdev_e1000_reset;
1671    dc->vmsd = &vmstate_e1000;
1672    dc->props = e1000_properties;
1673}
1674
1675static void e1000_instance_init(Object *obj)
1676{
1677    E1000State *n = E1000(obj);
1678    device_add_bootindex_property(obj, &n->conf.bootindex,
1679                                  "bootindex", "/ethernet-phy@0",
1680                                  DEVICE(n), NULL);
1681}
1682
1683static const TypeInfo e1000_base_info = {
1684    .name          = TYPE_E1000_BASE,
1685    .parent        = TYPE_PCI_DEVICE,
1686    .instance_size = sizeof(E1000State),
1687    .instance_init = e1000_instance_init,
1688    .class_size    = sizeof(E1000BaseClass),
1689    .abstract      = true,
1690    .interfaces = (InterfaceInfo[]) {
1691        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1692        { },
1693    },
1694};
1695
1696static const E1000Info e1000_devices[] = {
1697    {
1698        .name      = "e1000",
1699        .device_id = E1000_DEV_ID_82540EM,
1700        .revision  = 0x03,
1701        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1702    },
1703    {
1704        .name      = "e1000-82544gc",
1705        .device_id = E1000_DEV_ID_82544GC_COPPER,
1706        .revision  = 0x03,
1707        .phy_id2   = E1000_PHY_ID2_82544x,
1708    },
1709    {
1710        .name      = "e1000-82545em",
1711        .device_id = E1000_DEV_ID_82545EM_COPPER,
1712        .revision  = 0x03,
1713        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1714    },
1715};
1716
1717static void e1000_register_types(void)
1718{
1719    int i;
1720
1721    type_register_static(&e1000_base_info);
1722    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1723        const E1000Info *info = &e1000_devices[i];
1724        TypeInfo type_info = {};
1725
1726        type_info.name = info->name;
1727        type_info.parent = TYPE_E1000_BASE;
1728        type_info.class_data = (void *)info;
1729        type_info.class_init = e1000_class_init;
1730        type_info.instance_init = e1000_instance_init;
1731
1732        type_register(&type_info);
1733    }
1734}
1735
1736type_init(e1000_register_types)
1737