qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2.1 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/pci/pci.h"
  30#include "hw/qdev-properties.h"
  31#include "migration/vmstate.h"
  32#include "net/net.h"
  33#include "net/checksum.h"
  34#include "sysemu/sysemu.h"
  35#include "sysemu/dma.h"
  36#include "qemu/iov.h"
  37#include "qemu/module.h"
  38#include "qemu/range.h"
  39
  40#include "e1000x_common.h"
  41#include "trace.h"
  42#include "qom/object.h"
  43
  44static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  45
  46/* #define E1000_DEBUG */
  47
  48#ifdef E1000_DEBUG
  49enum {
  50    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  51    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  52    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  53    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  54};
  55#define DBGBIT(x)    (1<<DEBUG_##x)
  56static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  57
  58#define DBGOUT(what, fmt, ...) do { \
  59    if (debugflags & DBGBIT(what)) \
  60        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  61    } while (0)
  62#else
  63#define DBGOUT(what, fmt, ...) do {} while (0)
  64#endif
  65
  66#define IOPORT_SIZE       0x40
  67#define PNPMMIO_SIZE      0x20000
  68#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  69
  70#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  71
  72/*
  73 * HW models:
  74 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  75 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  76 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  77 *  Others never tested
  78 */
  79
  80struct E1000State_st {
  81    /*< private >*/
  82    PCIDevice parent_obj;
  83    /*< public >*/
  84
  85    NICState *nic;
  86    NICConf conf;
  87    MemoryRegion mmio;
  88    MemoryRegion io;
  89
  90    uint32_t mac_reg[0x8000];
  91    uint16_t phy_reg[0x20];
  92    uint16_t eeprom_data[64];
  93
  94    uint32_t rxbuf_size;
  95    uint32_t rxbuf_min_shift;
  96    struct e1000_tx {
  97        unsigned char header[256];
  98        unsigned char vlan_header[4];
  99        /* Fields vlan and data must not be reordered or separated. */
 100        unsigned char vlan[4];
 101        unsigned char data[0x10000];
 102        uint16_t size;
 103        unsigned char vlan_needed;
 104        unsigned char sum_needed;
 105        bool cptse;
 106        e1000x_txd_props props;
 107        e1000x_txd_props tso_props;
 108        uint16_t tso_frames;
 109    } tx;
 110
 111    struct {
 112        uint32_t val_in;    /* shifted in from guest driver */
 113        uint16_t bitnum_in;
 114        uint16_t bitnum_out;
 115        uint16_t reading;
 116        uint32_t old_eecd;
 117    } eecd_state;
 118
 119    QEMUTimer *autoneg_timer;
 120
 121    QEMUTimer *mit_timer;      /* Mitigation timer. */
 122    bool mit_timer_on;         /* Mitigation timer is running. */
 123    bool mit_irq_level;        /* Tracks interrupt pin level. */
 124    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 125
 126    QEMUTimer *flush_queue_timer;
 127
 128/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 129#define E1000_FLAG_AUTONEG_BIT 0
 130#define E1000_FLAG_MIT_BIT 1
 131#define E1000_FLAG_MAC_BIT 2
 132#define E1000_FLAG_TSO_BIT 3
 133#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 134#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 135#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 136#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 137    uint32_t compat_flags;
 138    bool received_tx_tso;
 139    bool use_tso_for_migration;
 140    e1000x_txd_props mig_props;
 141};
 142typedef struct E1000State_st E1000State;
 143
 144#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 145
 146struct E1000BaseClass {
 147    PCIDeviceClass parent_class;
 148    uint16_t phy_id2;
 149};
 150typedef struct E1000BaseClass E1000BaseClass;
 151
 152#define TYPE_E1000_BASE "e1000-base"
 153
 154DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
 155                     E1000, TYPE_E1000_BASE)
 156
 157
 158static void
 159e1000_link_up(E1000State *s)
 160{
 161    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 162
 163    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 164    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 165}
 166
 167static void
 168e1000_autoneg_done(E1000State *s)
 169{
 170    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 171
 172    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 173    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 174}
 175
 176static bool
 177have_autoneg(E1000State *s)
 178{
 179    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 180}
 181
 182static void
 183set_phy_ctrl(E1000State *s, int index, uint16_t val)
 184{
 185    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 186    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 187                                   MII_CR_RESET |
 188                                   MII_CR_RESTART_AUTO_NEG);
 189
 190    /*
 191     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 192     * migrate during auto negotiation, after migration the link will be
 193     * down.
 194     */
 195    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 196        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 197    }
 198}
 199
 200static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 201    [PHY_CTRL] = set_phy_ctrl,
 202};
 203
 204enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 205
 206enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 207static const char phy_regcap[0x20] = {
 208    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 209    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 210    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 211    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 212    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 213    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 214    [PHY_AUTONEG_EXP] = PHY_R,
 215};
 216
 217/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 218static const uint16_t phy_reg_init[] = {
 219    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 220                   MII_CR_FULL_DUPLEX |
 221                   MII_CR_AUTO_NEG_EN,
 222
 223    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 224                   MII_SR_LINK_STATUS |   /* link initially up */
 225                   MII_SR_AUTONEG_CAPS |
 226                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 227                   MII_SR_PREAMBLE_SUPPRESS |
 228                   MII_SR_EXTENDED_STATUS |
 229                   MII_SR_10T_HD_CAPS |
 230                   MII_SR_10T_FD_CAPS |
 231                   MII_SR_100X_HD_CAPS |
 232                   MII_SR_100X_FD_CAPS,
 233
 234    [PHY_ID1] = 0x141,
 235    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 236    [PHY_AUTONEG_ADV] = 0xde1,
 237    [PHY_LP_ABILITY] = 0x1e0,
 238    [PHY_1000T_CTRL] = 0x0e00,
 239    [PHY_1000T_STATUS] = 0x3c00,
 240    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 241    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 242    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 243};
 244
 245static const uint32_t mac_reg_init[] = {
 246    [PBA]     = 0x00100030,
 247    [LEDCTL]  = 0x602,
 248    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 249                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 250    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 251                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 252                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 253                E1000_STATUS_LU,
 254    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 255                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 256                E1000_MANC_RMCP_EN,
 257};
 258
 259/* Helper function, *curr == 0 means the value is not set */
 260static inline void
 261mit_update_delay(uint32_t *curr, uint32_t value)
 262{
 263    if (value && (*curr == 0 || value < *curr)) {
 264        *curr = value;
 265    }
 266}
 267
 268static void
 269set_interrupt_cause(E1000State *s, int index, uint32_t val)
 270{
 271    PCIDevice *d = PCI_DEVICE(s);
 272    uint32_t pending_ints;
 273    uint32_t mit_delay;
 274
 275    s->mac_reg[ICR] = val;
 276
 277    /*
 278     * Make sure ICR and ICS registers have the same value.
 279     * The spec says that the ICS register is write-only.  However in practice,
 280     * on real hardware ICS is readable, and for reads it has the same value as
 281     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 282     *
 283     * The VxWorks PRO/1000 driver uses this behaviour.
 284     */
 285    s->mac_reg[ICS] = val;
 286
 287    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 288    if (!s->mit_irq_level && pending_ints) {
 289        /*
 290         * Here we detect a potential raising edge. We postpone raising the
 291         * interrupt line if we are inside the mitigation delay window
 292         * (s->mit_timer_on == 1).
 293         * We provide a partial implementation of interrupt mitigation,
 294         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 295         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 296         * RADV; relative timers based on TIDV and RDTR are not implemented.
 297         */
 298        if (s->mit_timer_on) {
 299            return;
 300        }
 301        if (chkflag(MIT)) {
 302            /* Compute the next mitigation delay according to pending
 303             * interrupts and the current values of RADV (provided
 304             * RDTR!=0), TADV and ITR.
 305             * Then rearm the timer.
 306             */
 307            mit_delay = 0;
 308            if (s->mit_ide &&
 309                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 310                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 311            }
 312            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 313                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 314            }
 315            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 316
 317            /*
 318             * According to e1000 SPEC, the Ethernet controller guarantees
 319             * a maximum observable interrupt rate of 7813 interrupts/sec.
 320             * Thus if mit_delay < 500 then the delay should be set to the
 321             * minimum delay possible which is 500.
 322             */
 323            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 324
 325            s->mit_timer_on = 1;
 326            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 327                      mit_delay * 256);
 328            s->mit_ide = 0;
 329        }
 330    }
 331
 332    s->mit_irq_level = (pending_ints != 0);
 333    pci_set_irq(d, s->mit_irq_level);
 334}
 335
 336static void
 337e1000_mit_timer(void *opaque)
 338{
 339    E1000State *s = opaque;
 340
 341    s->mit_timer_on = 0;
 342    /* Call set_interrupt_cause to update the irq level (if necessary). */
 343    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 344}
 345
 346static void
 347set_ics(E1000State *s, int index, uint32_t val)
 348{
 349    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 350        s->mac_reg[IMS]);
 351    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 352}
 353
 354static void
 355e1000_autoneg_timer(void *opaque)
 356{
 357    E1000State *s = opaque;
 358    if (!qemu_get_queue(s->nic)->link_down) {
 359        e1000_autoneg_done(s);
 360        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 361    }
 362}
 363
 364static void e1000_reset(void *opaque)
 365{
 366    E1000State *d = opaque;
 367    E1000BaseClass *edc = E1000_GET_CLASS(d);
 368    uint8_t *macaddr = d->conf.macaddr.a;
 369
 370    timer_del(d->autoneg_timer);
 371    timer_del(d->mit_timer);
 372    timer_del(d->flush_queue_timer);
 373    d->mit_timer_on = 0;
 374    d->mit_irq_level = 0;
 375    d->mit_ide = 0;
 376    memset(d->phy_reg, 0, sizeof d->phy_reg);
 377    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 378    d->phy_reg[PHY_ID2] = edc->phy_id2;
 379    memset(d->mac_reg, 0, sizeof d->mac_reg);
 380    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 381    d->rxbuf_min_shift = 1;
 382    memset(&d->tx, 0, sizeof d->tx);
 383
 384    if (qemu_get_queue(d->nic)->link_down) {
 385        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 386    }
 387
 388    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 389}
 390
 391static void
 392set_ctrl(E1000State *s, int index, uint32_t val)
 393{
 394    /* RST is self clearing */
 395    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 396}
 397
 398static void
 399e1000_flush_queue_timer(void *opaque)
 400{
 401    E1000State *s = opaque;
 402
 403    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 404}
 405
 406static void
 407set_rx_control(E1000State *s, int index, uint32_t val)
 408{
 409    s->mac_reg[RCTL] = val;
 410    s->rxbuf_size = e1000x_rxbufsize(val);
 411    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 412    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 413           s->mac_reg[RCTL]);
 414    timer_mod(s->flush_queue_timer,
 415              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
 416}
 417
 418static void
 419set_mdic(E1000State *s, int index, uint32_t val)
 420{
 421    uint32_t data = val & E1000_MDIC_DATA_MASK;
 422    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 423
 424    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 425        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 426    else if (val & E1000_MDIC_OP_READ) {
 427        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 428        if (!(phy_regcap[addr] & PHY_R)) {
 429            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 430            val |= E1000_MDIC_ERROR;
 431        } else
 432            val = (val ^ data) | s->phy_reg[addr];
 433    } else if (val & E1000_MDIC_OP_WRITE) {
 434        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 435        if (!(phy_regcap[addr] & PHY_W)) {
 436            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 437            val |= E1000_MDIC_ERROR;
 438        } else {
 439            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 440                phyreg_writeops[addr](s, index, data);
 441            } else {
 442                s->phy_reg[addr] = data;
 443            }
 444        }
 445    }
 446    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 447
 448    if (val & E1000_MDIC_INT_EN) {
 449        set_ics(s, 0, E1000_ICR_MDAC);
 450    }
 451}
 452
 453static uint32_t
 454get_eecd(E1000State *s, int index)
 455{
 456    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 457
 458    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 459           s->eecd_state.bitnum_out, s->eecd_state.reading);
 460    if (!s->eecd_state.reading ||
 461        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 462          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 463        ret |= E1000_EECD_DO;
 464    return ret;
 465}
 466
 467static void
 468set_eecd(E1000State *s, int index, uint32_t val)
 469{
 470    uint32_t oldval = s->eecd_state.old_eecd;
 471
 472    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 473            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 474    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 475        return;
 476    }
 477    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 478        s->eecd_state.val_in = 0;
 479        s->eecd_state.bitnum_in = 0;
 480        s->eecd_state.bitnum_out = 0;
 481        s->eecd_state.reading = 0;
 482    }
 483    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 484        return;
 485    }
 486    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 487        s->eecd_state.bitnum_out++;
 488        return;
 489    }
 490    s->eecd_state.val_in <<= 1;
 491    if (val & E1000_EECD_DI)
 492        s->eecd_state.val_in |= 1;
 493    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 494        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 495        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 496            EEPROM_READ_OPCODE_MICROWIRE);
 497    }
 498    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 499           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 500           s->eecd_state.reading);
 501}
 502
 503static uint32_t
 504flash_eerd_read(E1000State *s, int x)
 505{
 506    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 507
 508    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 509        return (s->mac_reg[EERD]);
 510
 511    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 512        return (E1000_EEPROM_RW_REG_DONE | r);
 513
 514    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 515           E1000_EEPROM_RW_REG_DONE | r);
 516}
 517
 518static void
 519putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 520{
 521    uint32_t sum;
 522
 523    if (cse && cse < n)
 524        n = cse + 1;
 525    if (sloc < n-1) {
 526        sum = net_checksum_add(n-css, data+css);
 527        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 528    }
 529}
 530
 531static inline void
 532inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 533{
 534    if (!memcmp(arr, bcast, sizeof bcast)) {
 535        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 536    } else if (arr[0] & 1) {
 537        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 538    }
 539}
 540
 541static void
 542e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 543{
 544    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 545                                    PTC1023, PTC1522 };
 546
 547    NetClientState *nc = qemu_get_queue(s->nic);
 548    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 549        qemu_receive_packet(nc, buf, size);
 550    } else {
 551        qemu_send_packet(nc, buf, size);
 552    }
 553    inc_tx_bcast_or_mcast_count(s, buf);
 554    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 555}
 556
 557static void
 558xmit_seg(E1000State *s)
 559{
 560    uint16_t len;
 561    unsigned int frames = s->tx.tso_frames, css, sofar;
 562    struct e1000_tx *tp = &s->tx;
 563    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 564
 565    if (tp->cptse) {
 566        css = props->ipcss;
 567        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 568               frames, tp->size, css);
 569        if (props->ip) {    /* IPv4 */
 570            stw_be_p(tp->data+css+2, tp->size - css);
 571            stw_be_p(tp->data+css+4,
 572                     lduw_be_p(tp->data + css + 4) + frames);
 573        } else {         /* IPv6 */
 574            stw_be_p(tp->data+css+4, tp->size - css);
 575        }
 576        css = props->tucss;
 577        len = tp->size - css;
 578        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 579        if (props->tcp) {
 580            sofar = frames * props->mss;
 581            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 582            if (props->paylen - sofar > props->mss) {
 583                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 584            } else if (frames) {
 585                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 586            }
 587        } else {    /* UDP */
 588            stw_be_p(tp->data+css+4, len);
 589        }
 590        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 591            unsigned int phsum;
 592            // add pseudo-header length before checksum calculation
 593            void *sp = tp->data + props->tucso;
 594
 595            phsum = lduw_be_p(sp) + len;
 596            phsum = (phsum >> 16) + (phsum & 0xffff);
 597            stw_be_p(sp, phsum);
 598        }
 599        tp->tso_frames++;
 600    }
 601
 602    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 603        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 604    }
 605    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 606        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 607    }
 608    if (tp->vlan_needed) {
 609        memmove(tp->vlan, tp->data, 4);
 610        memmove(tp->data, tp->data + 4, 8);
 611        memcpy(tp->data + 8, tp->vlan_header, 4);
 612        e1000_send_packet(s, tp->vlan, tp->size + 4);
 613    } else {
 614        e1000_send_packet(s, tp->data, tp->size);
 615    }
 616
 617    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 618    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 619    s->mac_reg[GPTC] = s->mac_reg[TPT];
 620    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 621    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 622}
 623
 624static void
 625process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 626{
 627    PCIDevice *d = PCI_DEVICE(s);
 628    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 629    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 630    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 631    unsigned int msh = 0xfffff;
 632    uint64_t addr;
 633    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 634    struct e1000_tx *tp = &s->tx;
 635
 636    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 637    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 638        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 639            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 640            s->use_tso_for_migration = 1;
 641            tp->tso_frames = 0;
 642        } else {
 643            e1000x_read_tx_ctx_descr(xp, &tp->props);
 644            s->use_tso_for_migration = 0;
 645        }
 646        return;
 647    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 648        // data descriptor
 649        if (tp->size == 0) {
 650            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 651        }
 652        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 653    } else {
 654        // legacy descriptor
 655        tp->cptse = 0;
 656    }
 657
 658    if (e1000x_vlan_enabled(s->mac_reg) &&
 659        e1000x_is_vlan_txd(txd_lower) &&
 660        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 661        tp->vlan_needed = 1;
 662        stw_be_p(tp->vlan_header,
 663                      le16_to_cpu(s->mac_reg[VET]));
 664        stw_be_p(tp->vlan_header + 2,
 665                      le16_to_cpu(dp->upper.fields.special));
 666    }
 667
 668    addr = le64_to_cpu(dp->buffer_addr);
 669    if (tp->cptse) {
 670        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 671        do {
 672            bytes = split_size;
 673            if (tp->size >= msh) {
 674                goto eop;
 675            }
 676            if (tp->size + bytes > msh)
 677                bytes = msh - tp->size;
 678
 679            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 680            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 681            sz = tp->size + bytes;
 682            if (sz >= tp->tso_props.hdr_len
 683                && tp->size < tp->tso_props.hdr_len) {
 684                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 685            }
 686            tp->size = sz;
 687            addr += bytes;
 688            if (sz == msh) {
 689                xmit_seg(s);
 690                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 691                tp->size = tp->tso_props.hdr_len;
 692            }
 693            split_size -= bytes;
 694        } while (bytes && split_size);
 695    } else {
 696        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 697        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 698        tp->size += split_size;
 699    }
 700
 701eop:
 702    if (!(txd_lower & E1000_TXD_CMD_EOP))
 703        return;
 704    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 705        xmit_seg(s);
 706    }
 707    tp->tso_frames = 0;
 708    tp->sum_needed = 0;
 709    tp->vlan_needed = 0;
 710    tp->size = 0;
 711    tp->cptse = 0;
 712}
 713
 714static uint32_t
 715txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 716{
 717    PCIDevice *d = PCI_DEVICE(s);
 718    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 719
 720    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 721        return 0;
 722    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 723                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 724    dp->upper.data = cpu_to_le32(txd_upper);
 725    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 726                  &dp->upper, sizeof(dp->upper));
 727    return E1000_ICR_TXDW;
 728}
 729
 730static uint64_t tx_desc_base(E1000State *s)
 731{
 732    uint64_t bah = s->mac_reg[TDBAH];
 733    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 734
 735    return (bah << 32) + bal;
 736}
 737
 738static void
 739start_xmit(E1000State *s)
 740{
 741    PCIDevice *d = PCI_DEVICE(s);
 742    dma_addr_t base;
 743    struct e1000_tx_desc desc;
 744    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 745
 746    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 747        DBGOUT(TX, "tx disabled\n");
 748        return;
 749    }
 750
 751    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 752        base = tx_desc_base(s) +
 753               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 754        pci_dma_read(d, base, &desc, sizeof(desc));
 755
 756        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 757               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 758               desc.upper.data);
 759
 760        process_tx_desc(s, &desc);
 761        cause |= txdesc_writeback(s, base, &desc);
 762
 763        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 764            s->mac_reg[TDH] = 0;
 765        /*
 766         * the following could happen only if guest sw assigns
 767         * bogus values to TDT/TDLEN.
 768         * there's nothing too intelligent we could do about this.
 769         */
 770        if (s->mac_reg[TDH] == tdh_start ||
 771            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 772            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 773                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 774            break;
 775        }
 776    }
 777    set_ics(s, 0, cause);
 778}
 779
 780static int
 781receive_filter(E1000State *s, const uint8_t *buf, int size)
 782{
 783    uint32_t rctl = s->mac_reg[RCTL];
 784    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 785
 786    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 787        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 788        uint16_t vid = lduw_be_p(buf + 14);
 789        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 790                                 ((vid >> 5) & 0x7f));
 791        if ((vfta & (1 << (vid & 0x1f))) == 0)
 792            return 0;
 793    }
 794
 795    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 796        return 1;
 797    }
 798
 799    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 800        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 801        return 1;
 802    }
 803
 804    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 805        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 806        return 1;
 807    }
 808
 809    return e1000x_rx_group_filter(s->mac_reg, buf);
 810}
 811
 812static void
 813e1000_set_link_status(NetClientState *nc)
 814{
 815    E1000State *s = qemu_get_nic_opaque(nc);
 816    uint32_t old_status = s->mac_reg[STATUS];
 817
 818    if (nc->link_down) {
 819        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 820    } else {
 821        if (have_autoneg(s) &&
 822            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 823            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 824        } else {
 825            e1000_link_up(s);
 826        }
 827    }
 828
 829    if (s->mac_reg[STATUS] != old_status)
 830        set_ics(s, 0, E1000_ICR_LSC);
 831}
 832
 833static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 834{
 835    int bufs;
 836    /* Fast-path short packets */
 837    if (total_size <= s->rxbuf_size) {
 838        return s->mac_reg[RDH] != s->mac_reg[RDT];
 839    }
 840    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 841        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 842    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 843        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 844            s->mac_reg[RDT] - s->mac_reg[RDH];
 845    } else {
 846        return false;
 847    }
 848    return total_size <= bufs * s->rxbuf_size;
 849}
 850
 851static bool
 852e1000_can_receive(NetClientState *nc)
 853{
 854    E1000State *s = qemu_get_nic_opaque(nc);
 855
 856    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 857        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
 858}
 859
 860static uint64_t rx_desc_base(E1000State *s)
 861{
 862    uint64_t bah = s->mac_reg[RDBAH];
 863    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 864
 865    return (bah << 32) + bal;
 866}
 867
 868static void
 869e1000_receiver_overrun(E1000State *s, size_t size)
 870{
 871    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
 872    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
 873    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
 874    set_ics(s, 0, E1000_ICS_RXO);
 875}
 876
 877static ssize_t
 878e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 879{
 880    E1000State *s = qemu_get_nic_opaque(nc);
 881    PCIDevice *d = PCI_DEVICE(s);
 882    struct e1000_rx_desc desc;
 883    dma_addr_t base;
 884    unsigned int n, rdt;
 885    uint32_t rdh_start;
 886    uint16_t vlan_special = 0;
 887    uint8_t vlan_status = 0;
 888    uint8_t min_buf[MIN_BUF_SIZE];
 889    struct iovec min_iov;
 890    uint8_t *filter_buf = iov->iov_base;
 891    size_t size = iov_size(iov, iovcnt);
 892    size_t iov_ofs = 0;
 893    size_t desc_offset;
 894    size_t desc_size;
 895    size_t total_size;
 896
 897    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 898        return -1;
 899    }
 900
 901    if (timer_pending(s->flush_queue_timer)) {
 902        return 0;
 903    }
 904
 905    /* Pad to minimum Ethernet frame length */
 906    if (size < sizeof(min_buf)) {
 907        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 908        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 909        min_iov.iov_base = filter_buf = min_buf;
 910        min_iov.iov_len = size = sizeof(min_buf);
 911        iovcnt = 1;
 912        iov = &min_iov;
 913    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 914        /* This is very unlikely, but may happen. */
 915        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 916        filter_buf = min_buf;
 917    }
 918
 919    /* Discard oversized packets if !LPE and !SBP. */
 920    if (e1000x_is_oversized(s->mac_reg, size)) {
 921        return size;
 922    }
 923
 924    if (!receive_filter(s, filter_buf, size)) {
 925        return size;
 926    }
 927
 928    if (e1000x_vlan_enabled(s->mac_reg) &&
 929        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 930        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 931        iov_ofs = 4;
 932        if (filter_buf == iov->iov_base) {
 933            memmove(filter_buf + 4, filter_buf, 12);
 934        } else {
 935            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 936            while (iov->iov_len <= iov_ofs) {
 937                iov_ofs -= iov->iov_len;
 938                iov++;
 939            }
 940        }
 941        vlan_status = E1000_RXD_STAT_VP;
 942        size -= 4;
 943    }
 944
 945    rdh_start = s->mac_reg[RDH];
 946    desc_offset = 0;
 947    total_size = size + e1000x_fcs_len(s->mac_reg);
 948    if (!e1000_has_rxbufs(s, total_size)) {
 949        e1000_receiver_overrun(s, total_size);
 950        return -1;
 951    }
 952    do {
 953        desc_size = total_size - desc_offset;
 954        if (desc_size > s->rxbuf_size) {
 955            desc_size = s->rxbuf_size;
 956        }
 957        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 958        pci_dma_read(d, base, &desc, sizeof(desc));
 959        desc.special = vlan_special;
 960        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 961        if (desc.buffer_addr) {
 962            if (desc_offset < size) {
 963                size_t iov_copy;
 964                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 965                size_t copy_size = size - desc_offset;
 966                if (copy_size > s->rxbuf_size) {
 967                    copy_size = s->rxbuf_size;
 968                }
 969                do {
 970                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 971                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 972                    copy_size -= iov_copy;
 973                    ba += iov_copy;
 974                    iov_ofs += iov_copy;
 975                    if (iov_ofs == iov->iov_len) {
 976                        iov++;
 977                        iov_ofs = 0;
 978                    }
 979                } while (copy_size);
 980            }
 981            desc_offset += desc_size;
 982            desc.length = cpu_to_le16(desc_size);
 983            if (desc_offset >= total_size) {
 984                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 985            } else {
 986                /* Guest zeroing out status is not a hardware requirement.
 987                   Clear EOP in case guest didn't do it. */
 988                desc.status &= ~E1000_RXD_STAT_EOP;
 989            }
 990        } else { // as per intel docs; skip descriptors with null buf addr
 991            DBGOUT(RX, "Null RX descriptor!!\n");
 992        }
 993        pci_dma_write(d, base, &desc, sizeof(desc));
 994
 995        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 996            s->mac_reg[RDH] = 0;
 997        /* see comment in start_xmit; same here */
 998        if (s->mac_reg[RDH] == rdh_start ||
 999            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1000            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1001                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1002            e1000_receiver_overrun(s, total_size);
1003            return -1;
1004        }
1005    } while (desc_offset < total_size);
1006
1007    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1008
1009    n = E1000_ICS_RXT0;
1010    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1011        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1012    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1013        s->rxbuf_min_shift)
1014        n |= E1000_ICS_RXDMT0;
1015
1016    set_ics(s, 0, n);
1017
1018    return size;
1019}
1020
1021static ssize_t
1022e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1023{
1024    const struct iovec iov = {
1025        .iov_base = (uint8_t *)buf,
1026        .iov_len = size
1027    };
1028
1029    return e1000_receive_iov(nc, &iov, 1);
1030}
1031
1032static uint32_t
1033mac_readreg(E1000State *s, int index)
1034{
1035    return s->mac_reg[index];
1036}
1037
1038static uint32_t
1039mac_low4_read(E1000State *s, int index)
1040{
1041    return s->mac_reg[index] & 0xf;
1042}
1043
1044static uint32_t
1045mac_low11_read(E1000State *s, int index)
1046{
1047    return s->mac_reg[index] & 0x7ff;
1048}
1049
1050static uint32_t
1051mac_low13_read(E1000State *s, int index)
1052{
1053    return s->mac_reg[index] & 0x1fff;
1054}
1055
1056static uint32_t
1057mac_low16_read(E1000State *s, int index)
1058{
1059    return s->mac_reg[index] & 0xffff;
1060}
1061
1062static uint32_t
1063mac_icr_read(E1000State *s, int index)
1064{
1065    uint32_t ret = s->mac_reg[ICR];
1066
1067    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1068    set_interrupt_cause(s, 0, 0);
1069    return ret;
1070}
1071
1072static uint32_t
1073mac_read_clr4(E1000State *s, int index)
1074{
1075    uint32_t ret = s->mac_reg[index];
1076
1077    s->mac_reg[index] = 0;
1078    return ret;
1079}
1080
1081static uint32_t
1082mac_read_clr8(E1000State *s, int index)
1083{
1084    uint32_t ret = s->mac_reg[index];
1085
1086    s->mac_reg[index] = 0;
1087    s->mac_reg[index-1] = 0;
1088    return ret;
1089}
1090
1091static void
1092mac_writereg(E1000State *s, int index, uint32_t val)
1093{
1094    uint32_t macaddr[2];
1095
1096    s->mac_reg[index] = val;
1097
1098    if (index == RA + 1) {
1099        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1100        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1101        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1102    }
1103}
1104
1105static void
1106set_rdt(E1000State *s, int index, uint32_t val)
1107{
1108    s->mac_reg[index] = val & 0xffff;
1109    if (e1000_has_rxbufs(s, 1)) {
1110        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1111    }
1112}
1113
1114static void
1115set_16bit(E1000State *s, int index, uint32_t val)
1116{
1117    s->mac_reg[index] = val & 0xffff;
1118}
1119
1120static void
1121set_dlen(E1000State *s, int index, uint32_t val)
1122{
1123    s->mac_reg[index] = val & 0xfff80;
1124}
1125
1126static void
1127set_tctl(E1000State *s, int index, uint32_t val)
1128{
1129    s->mac_reg[index] = val;
1130    s->mac_reg[TDT] &= 0xffff;
1131    start_xmit(s);
1132}
1133
1134static void
1135set_icr(E1000State *s, int index, uint32_t val)
1136{
1137    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1138    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1139}
1140
1141static void
1142set_imc(E1000State *s, int index, uint32_t val)
1143{
1144    s->mac_reg[IMS] &= ~val;
1145    set_ics(s, 0, 0);
1146}
1147
1148static void
1149set_ims(E1000State *s, int index, uint32_t val)
1150{
1151    s->mac_reg[IMS] |= val;
1152    set_ics(s, 0, 0);
1153}
1154
1155#define getreg(x)    [x] = mac_readreg
1156typedef uint32_t (*readops)(E1000State *, int);
1157static const readops macreg_readops[] = {
1158    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1159    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1160    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1161    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1162    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1163    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1164    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1165    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1166    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1167    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1168    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1169    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1170    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1171    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1172    getreg(GOTCL),
1173
1174    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1175    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1176    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1177    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1178    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1179    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1180    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1181    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1182    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1183    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1184    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1185    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1186    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1187    [MPTC]    = mac_read_clr4,
1188    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1189    [EERD]    = flash_eerd_read,
1190    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1191    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1192    [RDFPC]   = mac_low13_read,
1193    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1194    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1195    [TDFPC]   = mac_low13_read,
1196    [AIT]     = mac_low16_read,
1197
1198    [CRCERRS ... MPC]   = &mac_readreg,
1199    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1200    [FFLT ... FFLT+6]   = &mac_low11_read,
1201    [RA ... RA+31]      = &mac_readreg,
1202    [WUPM ... WUPM+31]  = &mac_readreg,
1203    [MTA ... MTA+127]   = &mac_readreg,
1204    [VFTA ... VFTA+127] = &mac_readreg,
1205    [FFMT ... FFMT+254] = &mac_low4_read,
1206    [FFVT ... FFVT+254] = &mac_readreg,
1207    [PBM ... PBM+16383] = &mac_readreg,
1208};
1209enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1210
1211#define putreg(x)    [x] = mac_writereg
1212typedef void (*writeops)(E1000State *, int, uint32_t);
1213static const writeops macreg_writeops[] = {
1214    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1215    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1216    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1217    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1218    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1219    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1220    putreg(WUS),      putreg(AIT),
1221
1222    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1223    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1224    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1225    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1226    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1227    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1228    [ITR]    = set_16bit,
1229
1230    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1231    [FFLT ... FFLT+6]   = &mac_writereg,
1232    [RA ... RA+31]      = &mac_writereg,
1233    [WUPM ... WUPM+31]  = &mac_writereg,
1234    [MTA ... MTA+127]   = &mac_writereg,
1235    [VFTA ... VFTA+127] = &mac_writereg,
1236    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1237    [PBM ... PBM+16383] = &mac_writereg,
1238};
1239
1240enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1241
1242enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1243
1244#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1245/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1246 * f - flag bits (up to 6 possible flags)
1247 * n - flag needed
1248 * p - partially implenented */
1249static const uint8_t mac_reg_access[0x8000] = {
1250    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1251    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1252
1253    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1254    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1255    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1256    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1257    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1258    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1259    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1260    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1261    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1262    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1263    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1264    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1265    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1266    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1267    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1268    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1269    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1270    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1271    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1272    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1273    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1274    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1275    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1276    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1277    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1278    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1279    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1280    [BPTC]    = markflag(MAC),
1281
1282    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1290    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1291    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1292    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293};
1294
1295static void
1296e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1297                 unsigned size)
1298{
1299    E1000State *s = opaque;
1300    unsigned int index = (addr & 0x1ffff) >> 2;
1301
1302    if (index < NWRITEOPS && macreg_writeops[index]) {
1303        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1304            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1305            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1306                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1307                       "It is not fully implemented.\n", index<<2);
1308            }
1309            macreg_writeops[index](s, index, val);
1310        } else {    /* "flag needed" bit is set, but the flag is not active */
1311            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1312                   index<<2);
1313        }
1314    } else if (index < NREADOPS && macreg_readops[index]) {
1315        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1316               index<<2, val);
1317    } else {
1318        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1319               index<<2, val);
1320    }
1321}
1322
1323static uint64_t
1324e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1325{
1326    E1000State *s = opaque;
1327    unsigned int index = (addr & 0x1ffff) >> 2;
1328
1329    if (index < NREADOPS && macreg_readops[index]) {
1330        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1331            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1332            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1333                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1334                       "It is not fully implemented.\n", index<<2);
1335            }
1336            return macreg_readops[index](s, index);
1337        } else {    /* "flag needed" bit is set, but the flag is not active */
1338            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1339                   index<<2);
1340        }
1341    } else {
1342        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1343    }
1344    return 0;
1345}
1346
1347static const MemoryRegionOps e1000_mmio_ops = {
1348    .read = e1000_mmio_read,
1349    .write = e1000_mmio_write,
1350    .endianness = DEVICE_LITTLE_ENDIAN,
1351    .impl = {
1352        .min_access_size = 4,
1353        .max_access_size = 4,
1354    },
1355};
1356
1357static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1358                              unsigned size)
1359{
1360    E1000State *s = opaque;
1361
1362    (void)s;
1363    return 0;
1364}
1365
1366static void e1000_io_write(void *opaque, hwaddr addr,
1367                           uint64_t val, unsigned size)
1368{
1369    E1000State *s = opaque;
1370
1371    (void)s;
1372}
1373
1374static const MemoryRegionOps e1000_io_ops = {
1375    .read = e1000_io_read,
1376    .write = e1000_io_write,
1377    .endianness = DEVICE_LITTLE_ENDIAN,
1378};
1379
1380static bool is_version_1(void *opaque, int version_id)
1381{
1382    return version_id == 1;
1383}
1384
1385static int e1000_pre_save(void *opaque)
1386{
1387    E1000State *s = opaque;
1388    NetClientState *nc = qemu_get_queue(s->nic);
1389
1390    /*
1391     * If link is down and auto-negotiation is supported and ongoing,
1392     * complete auto-negotiation immediately. This allows us to look
1393     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1394     */
1395    if (nc->link_down && have_autoneg(s)) {
1396        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1397    }
1398
1399    /* Decide which set of props to migrate in the main structure */
1400    if (chkflag(TSO) || !s->use_tso_for_migration) {
1401        /* Either we're migrating with the extra subsection, in which
1402         * case the mig_props is always 'props' OR
1403         * we've not got the subsection, but 'props' was the last
1404         * updated.
1405         */
1406        s->mig_props = s->tx.props;
1407    } else {
1408        /* We're not using the subsection, and 'tso_props' was
1409         * the last updated.
1410         */
1411        s->mig_props = s->tx.tso_props;
1412    }
1413    return 0;
1414}
1415
1416static int e1000_post_load(void *opaque, int version_id)
1417{
1418    E1000State *s = opaque;
1419    NetClientState *nc = qemu_get_queue(s->nic);
1420
1421    if (!chkflag(MIT)) {
1422        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1423            s->mac_reg[TADV] = 0;
1424        s->mit_irq_level = false;
1425    }
1426    s->mit_ide = 0;
1427    s->mit_timer_on = true;
1428    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1429
1430    /* nc.link_down can't be migrated, so infer link_down according
1431     * to link status bit in mac_reg[STATUS].
1432     * Alternatively, restart link negotiation if it was in progress. */
1433    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1434
1435    if (have_autoneg(s) &&
1436        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1437        nc->link_down = false;
1438        timer_mod(s->autoneg_timer,
1439                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1440    }
1441
1442    s->tx.props = s->mig_props;
1443    if (!s->received_tx_tso) {
1444        /* We received only one set of offload data (tx.props)
1445         * and haven't got tx.tso_props.  The best we can do
1446         * is dupe the data.
1447         */
1448        s->tx.tso_props = s->mig_props;
1449    }
1450    return 0;
1451}
1452
1453static int e1000_tx_tso_post_load(void *opaque, int version_id)
1454{
1455    E1000State *s = opaque;
1456    s->received_tx_tso = true;
1457    return 0;
1458}
1459
1460static bool e1000_mit_state_needed(void *opaque)
1461{
1462    E1000State *s = opaque;
1463
1464    return chkflag(MIT);
1465}
1466
1467static bool e1000_full_mac_needed(void *opaque)
1468{
1469    E1000State *s = opaque;
1470
1471    return chkflag(MAC);
1472}
1473
1474static bool e1000_tso_state_needed(void *opaque)
1475{
1476    E1000State *s = opaque;
1477
1478    return chkflag(TSO);
1479}
1480
1481static const VMStateDescription vmstate_e1000_mit_state = {
1482    .name = "e1000/mit_state",
1483    .version_id = 1,
1484    .minimum_version_id = 1,
1485    .needed = e1000_mit_state_needed,
1486    .fields = (VMStateField[]) {
1487        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1488        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1489        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1490        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1491        VMSTATE_BOOL(mit_irq_level, E1000State),
1492        VMSTATE_END_OF_LIST()
1493    }
1494};
1495
1496static const VMStateDescription vmstate_e1000_full_mac_state = {
1497    .name = "e1000/full_mac_state",
1498    .version_id = 1,
1499    .minimum_version_id = 1,
1500    .needed = e1000_full_mac_needed,
1501    .fields = (VMStateField[]) {
1502        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1503        VMSTATE_END_OF_LIST()
1504    }
1505};
1506
1507static const VMStateDescription vmstate_e1000_tx_tso_state = {
1508    .name = "e1000/tx_tso_state",
1509    .version_id = 1,
1510    .minimum_version_id = 1,
1511    .needed = e1000_tso_state_needed,
1512    .post_load = e1000_tx_tso_post_load,
1513    .fields = (VMStateField[]) {
1514        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1515        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1516        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1517        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1518        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1519        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1520        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1521        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1522        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1523        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1524        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1525        VMSTATE_END_OF_LIST()
1526    }
1527};
1528
1529static const VMStateDescription vmstate_e1000 = {
1530    .name = "e1000",
1531    .version_id = 2,
1532    .minimum_version_id = 1,
1533    .pre_save = e1000_pre_save,
1534    .post_load = e1000_post_load,
1535    .fields = (VMStateField[]) {
1536        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1537        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1538        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1539        VMSTATE_UINT32(rxbuf_size, E1000State),
1540        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1541        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1542        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1543        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1544        VMSTATE_UINT16(eecd_state.reading, E1000State),
1545        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1546        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1547        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1548        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1549        VMSTATE_UINT8(mig_props.tucss, E1000State),
1550        VMSTATE_UINT8(mig_props.tucso, E1000State),
1551        VMSTATE_UINT16(mig_props.tucse, E1000State),
1552        VMSTATE_UINT32(mig_props.paylen, E1000State),
1553        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1554        VMSTATE_UINT16(mig_props.mss, E1000State),
1555        VMSTATE_UINT16(tx.size, E1000State),
1556        VMSTATE_UINT16(tx.tso_frames, E1000State),
1557        VMSTATE_UINT8(tx.sum_needed, E1000State),
1558        VMSTATE_INT8(mig_props.ip, E1000State),
1559        VMSTATE_INT8(mig_props.tcp, E1000State),
1560        VMSTATE_BUFFER(tx.header, E1000State),
1561        VMSTATE_BUFFER(tx.data, E1000State),
1562        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1563        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1564        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1565        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1566        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1567        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1568        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1569        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1570        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1571        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1572        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1573        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1574        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1575        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1576        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1577        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1578        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1579        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1580        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1581        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1582        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1583        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1584        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1585        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1586        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1587        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1588        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1589        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1590        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1591        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1592        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1593        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1594        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1595        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1596        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1597        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1598        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1599        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1600        VMSTATE_UINT32(mac_reg[VET], E1000State),
1601        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1602        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1603        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1604        VMSTATE_END_OF_LIST()
1605    },
1606    .subsections = (const VMStateDescription*[]) {
1607        &vmstate_e1000_mit_state,
1608        &vmstate_e1000_full_mac_state,
1609        &vmstate_e1000_tx_tso_state,
1610        NULL
1611    }
1612};
1613
1614/*
1615 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1616 * Note: A valid DevId will be inserted during pci_e1000_realize().
1617 */
1618static const uint16_t e1000_eeprom_template[64] = {
1619    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1620    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1621    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1622    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1623    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1624    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1625    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1626    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1627};
1628
1629/* PCI interface */
1630
1631static void
1632e1000_mmio_setup(E1000State *d)
1633{
1634    int i;
1635    const uint32_t excluded_regs[] = {
1636        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1637        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1638    };
1639
1640    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1641                          "e1000-mmio", PNPMMIO_SIZE);
1642    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1643    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1644        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1645                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1646    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1647}
1648
1649static void
1650pci_e1000_uninit(PCIDevice *dev)
1651{
1652    E1000State *d = E1000(dev);
1653
1654    timer_free(d->autoneg_timer);
1655    timer_free(d->mit_timer);
1656    timer_free(d->flush_queue_timer);
1657    qemu_del_nic(d->nic);
1658}
1659
1660static NetClientInfo net_e1000_info = {
1661    .type = NET_CLIENT_DRIVER_NIC,
1662    .size = sizeof(NICState),
1663    .can_receive = e1000_can_receive,
1664    .receive = e1000_receive,
1665    .receive_iov = e1000_receive_iov,
1666    .link_status_changed = e1000_set_link_status,
1667};
1668
1669static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1670                                uint32_t val, int len)
1671{
1672    E1000State *s = E1000(pci_dev);
1673
1674    pci_default_write_config(pci_dev, address, val, len);
1675
1676    if (range_covers_byte(address, len, PCI_COMMAND) &&
1677        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1678        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1679    }
1680}
1681
1682static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1683{
1684    DeviceState *dev = DEVICE(pci_dev);
1685    E1000State *d = E1000(pci_dev);
1686    uint8_t *pci_conf;
1687    uint8_t *macaddr;
1688
1689    pci_dev->config_write = e1000_write_config;
1690
1691    pci_conf = pci_dev->config;
1692
1693    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1694    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1695
1696    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1697
1698    e1000_mmio_setup(d);
1699
1700    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1701
1702    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1703
1704    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1705    macaddr = d->conf.macaddr.a;
1706
1707    e1000x_core_prepare_eeprom(d->eeprom_data,
1708                               e1000_eeprom_template,
1709                               sizeof(e1000_eeprom_template),
1710                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1711                               macaddr);
1712
1713    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1714                          object_get_typename(OBJECT(d)), dev->id, d);
1715
1716    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1717
1718    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1719    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1720    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1721                                        e1000_flush_queue_timer, d);
1722}
1723
1724static void qdev_e1000_reset(DeviceState *dev)
1725{
1726    E1000State *d = E1000(dev);
1727    e1000_reset(d);
1728}
1729
1730static Property e1000_properties[] = {
1731    DEFINE_NIC_PROPERTIES(E1000State, conf),
1732    DEFINE_PROP_BIT("autonegotiation", E1000State,
1733                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1734    DEFINE_PROP_BIT("mitigation", E1000State,
1735                    compat_flags, E1000_FLAG_MIT_BIT, true),
1736    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1737                    compat_flags, E1000_FLAG_MAC_BIT, true),
1738    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1739                    compat_flags, E1000_FLAG_TSO_BIT, true),
1740    DEFINE_PROP_END_OF_LIST(),
1741};
1742
1743typedef struct E1000Info {
1744    const char *name;
1745    uint16_t   device_id;
1746    uint8_t    revision;
1747    uint16_t   phy_id2;
1748} E1000Info;
1749
1750static void e1000_class_init(ObjectClass *klass, void *data)
1751{
1752    DeviceClass *dc = DEVICE_CLASS(klass);
1753    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1754    E1000BaseClass *e = E1000_CLASS(klass);
1755    const E1000Info *info = data;
1756
1757    k->realize = pci_e1000_realize;
1758    k->exit = pci_e1000_uninit;
1759    k->romfile = "efi-e1000.rom";
1760    k->vendor_id = PCI_VENDOR_ID_INTEL;
1761    k->device_id = info->device_id;
1762    k->revision = info->revision;
1763    e->phy_id2 = info->phy_id2;
1764    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1765    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1766    dc->desc = "Intel Gigabit Ethernet";
1767    dc->reset = qdev_e1000_reset;
1768    dc->vmsd = &vmstate_e1000;
1769    device_class_set_props(dc, e1000_properties);
1770}
1771
1772static void e1000_instance_init(Object *obj)
1773{
1774    E1000State *n = E1000(obj);
1775    device_add_bootindex_property(obj, &n->conf.bootindex,
1776                                  "bootindex", "/ethernet-phy@0",
1777                                  DEVICE(n));
1778}
1779
1780static const TypeInfo e1000_base_info = {
1781    .name          = TYPE_E1000_BASE,
1782    .parent        = TYPE_PCI_DEVICE,
1783    .instance_size = sizeof(E1000State),
1784    .instance_init = e1000_instance_init,
1785    .class_size    = sizeof(E1000BaseClass),
1786    .abstract      = true,
1787    .interfaces = (InterfaceInfo[]) {
1788        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1789        { },
1790    },
1791};
1792
1793static const E1000Info e1000_devices[] = {
1794    {
1795        .name      = "e1000",
1796        .device_id = E1000_DEV_ID_82540EM,
1797        .revision  = 0x03,
1798        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1799    },
1800    {
1801        .name      = "e1000-82544gc",
1802        .device_id = E1000_DEV_ID_82544GC_COPPER,
1803        .revision  = 0x03,
1804        .phy_id2   = E1000_PHY_ID2_82544x,
1805    },
1806    {
1807        .name      = "e1000-82545em",
1808        .device_id = E1000_DEV_ID_82545EM_COPPER,
1809        .revision  = 0x03,
1810        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1811    },
1812};
1813
1814static void e1000_register_types(void)
1815{
1816    int i;
1817
1818    type_register_static(&e1000_base_info);
1819    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1820        const E1000Info *info = &e1000_devices[i];
1821        TypeInfo type_info = {};
1822
1823        type_info.name = info->name;
1824        type_info.parent = TYPE_E1000_BASE;
1825        type_info.class_data = (void *)info;
1826        type_info.class_init = e1000_class_init;
1827
1828        type_register(&type_info);
1829    }
1830}
1831
1832type_init(e1000_register_types)
1833