qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2.1 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/pci/pci.h"
  30#include "hw/qdev-properties.h"
  31#include "migration/vmstate.h"
  32#include "net/eth.h"
  33#include "net/net.h"
  34#include "net/checksum.h"
  35#include "sysemu/sysemu.h"
  36#include "sysemu/dma.h"
  37#include "qemu/iov.h"
  38#include "qemu/module.h"
  39#include "qemu/range.h"
  40
  41#include "e1000x_common.h"
  42#include "trace.h"
  43#include "qom/object.h"
  44
  45static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  46
  47/* #define E1000_DEBUG */
  48
  49#ifdef E1000_DEBUG
  50enum {
  51    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  52    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  53    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  54    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  55};
  56#define DBGBIT(x)    (1<<DEBUG_##x)
  57static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  58
  59#define DBGOUT(what, fmt, ...) do { \
  60    if (debugflags & DBGBIT(what)) \
  61        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  62    } while (0)
  63#else
  64#define DBGOUT(what, fmt, ...) do {} while (0)
  65#endif
  66
  67#define IOPORT_SIZE       0x40
  68#define PNPMMIO_SIZE      0x20000
  69#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  70
  71#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  72
  73/*
  74 * HW models:
  75 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  76 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  77 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  78 *  Others never tested
  79 */
  80
  81struct E1000State_st {
  82    /*< private >*/
  83    PCIDevice parent_obj;
  84    /*< public >*/
  85
  86    NICState *nic;
  87    NICConf conf;
  88    MemoryRegion mmio;
  89    MemoryRegion io;
  90
  91    uint32_t mac_reg[0x8000];
  92    uint16_t phy_reg[0x20];
  93    uint16_t eeprom_data[64];
  94
  95    uint32_t rxbuf_size;
  96    uint32_t rxbuf_min_shift;
  97    struct e1000_tx {
  98        unsigned char header[256];
  99        unsigned char vlan_header[4];
 100        /* Fields vlan and data must not be reordered or separated. */
 101        unsigned char vlan[4];
 102        unsigned char data[0x10000];
 103        uint16_t size;
 104        unsigned char vlan_needed;
 105        unsigned char sum_needed;
 106        bool cptse;
 107        e1000x_txd_props props;
 108        e1000x_txd_props tso_props;
 109        uint16_t tso_frames;
 110        bool busy;
 111    } tx;
 112
 113    struct {
 114        uint32_t val_in;    /* shifted in from guest driver */
 115        uint16_t bitnum_in;
 116        uint16_t bitnum_out;
 117        uint16_t reading;
 118        uint32_t old_eecd;
 119    } eecd_state;
 120
 121    QEMUTimer *autoneg_timer;
 122
 123    QEMUTimer *mit_timer;      /* Mitigation timer. */
 124    bool mit_timer_on;         /* Mitigation timer is running. */
 125    bool mit_irq_level;        /* Tracks interrupt pin level. */
 126    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 127
 128    QEMUTimer *flush_queue_timer;
 129
 130/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 131#define E1000_FLAG_AUTONEG_BIT 0
 132#define E1000_FLAG_MIT_BIT 1
 133#define E1000_FLAG_MAC_BIT 2
 134#define E1000_FLAG_TSO_BIT 3
 135#define E1000_FLAG_VET_BIT 4
 136#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 137#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 138#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 139#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 140#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
 141
 142    uint32_t compat_flags;
 143    bool received_tx_tso;
 144    bool use_tso_for_migration;
 145    e1000x_txd_props mig_props;
 146};
 147typedef struct E1000State_st E1000State;
 148
 149#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 150
 151struct E1000BaseClass {
 152    PCIDeviceClass parent_class;
 153    uint16_t phy_id2;
 154};
 155typedef struct E1000BaseClass E1000BaseClass;
 156
 157#define TYPE_E1000_BASE "e1000-base"
 158
 159DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
 160                     E1000, TYPE_E1000_BASE)
 161
 162
 163static void
 164e1000_link_up(E1000State *s)
 165{
 166    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 167
 168    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 169    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 170}
 171
 172static void
 173e1000_autoneg_done(E1000State *s)
 174{
 175    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 176
 177    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 178    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 179}
 180
 181static bool
 182have_autoneg(E1000State *s)
 183{
 184    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 185}
 186
 187static void
 188set_phy_ctrl(E1000State *s, int index, uint16_t val)
 189{
 190    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 191    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 192                                   MII_CR_RESET |
 193                                   MII_CR_RESTART_AUTO_NEG);
 194
 195    /*
 196     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 197     * migrate during auto negotiation, after migration the link will be
 198     * down.
 199     */
 200    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 201        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 202    }
 203}
 204
 205static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 206    [PHY_CTRL] = set_phy_ctrl,
 207};
 208
 209enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 210
 211enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 212static const char phy_regcap[0x20] = {
 213    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 214    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 215    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 216    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 217    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 218    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 219    [PHY_AUTONEG_EXP] = PHY_R,
 220};
 221
 222/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 223static const uint16_t phy_reg_init[] = {
 224    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 225                   MII_CR_FULL_DUPLEX |
 226                   MII_CR_AUTO_NEG_EN,
 227
 228    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 229                   MII_SR_LINK_STATUS |   /* link initially up */
 230                   MII_SR_AUTONEG_CAPS |
 231                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 232                   MII_SR_PREAMBLE_SUPPRESS |
 233                   MII_SR_EXTENDED_STATUS |
 234                   MII_SR_10T_HD_CAPS |
 235                   MII_SR_10T_FD_CAPS |
 236                   MII_SR_100X_HD_CAPS |
 237                   MII_SR_100X_FD_CAPS,
 238
 239    [PHY_ID1] = 0x141,
 240    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 241    [PHY_AUTONEG_ADV] = 0xde1,
 242    [PHY_LP_ABILITY] = 0x1e0,
 243    [PHY_1000T_CTRL] = 0x0e00,
 244    [PHY_1000T_STATUS] = 0x3c00,
 245    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 246    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 247    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 248};
 249
 250static const uint32_t mac_reg_init[] = {
 251    [PBA]     = 0x00100030,
 252    [LEDCTL]  = 0x602,
 253    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 254                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 255    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 256                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 257                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 258                E1000_STATUS_LU,
 259    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 260                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 261                E1000_MANC_RMCP_EN,
 262};
 263
 264/* Helper function, *curr == 0 means the value is not set */
 265static inline void
 266mit_update_delay(uint32_t *curr, uint32_t value)
 267{
 268    if (value && (*curr == 0 || value < *curr)) {
 269        *curr = value;
 270    }
 271}
 272
 273static void
 274set_interrupt_cause(E1000State *s, int index, uint32_t val)
 275{
 276    PCIDevice *d = PCI_DEVICE(s);
 277    uint32_t pending_ints;
 278    uint32_t mit_delay;
 279
 280    s->mac_reg[ICR] = val;
 281
 282    /*
 283     * Make sure ICR and ICS registers have the same value.
 284     * The spec says that the ICS register is write-only.  However in practice,
 285     * on real hardware ICS is readable, and for reads it has the same value as
 286     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 287     *
 288     * The VxWorks PRO/1000 driver uses this behaviour.
 289     */
 290    s->mac_reg[ICS] = val;
 291
 292    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 293    if (!s->mit_irq_level && pending_ints) {
 294        /*
 295         * Here we detect a potential raising edge. We postpone raising the
 296         * interrupt line if we are inside the mitigation delay window
 297         * (s->mit_timer_on == 1).
 298         * We provide a partial implementation of interrupt mitigation,
 299         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 300         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 301         * RADV; relative timers based on TIDV and RDTR are not implemented.
 302         */
 303        if (s->mit_timer_on) {
 304            return;
 305        }
 306        if (chkflag(MIT)) {
 307            /* Compute the next mitigation delay according to pending
 308             * interrupts and the current values of RADV (provided
 309             * RDTR!=0), TADV and ITR.
 310             * Then rearm the timer.
 311             */
 312            mit_delay = 0;
 313            if (s->mit_ide &&
 314                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 315                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 316            }
 317            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 318                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 319            }
 320            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 321
 322            /*
 323             * According to e1000 SPEC, the Ethernet controller guarantees
 324             * a maximum observable interrupt rate of 7813 interrupts/sec.
 325             * Thus if mit_delay < 500 then the delay should be set to the
 326             * minimum delay possible which is 500.
 327             */
 328            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 329
 330            s->mit_timer_on = 1;
 331            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 332                      mit_delay * 256);
 333            s->mit_ide = 0;
 334        }
 335    }
 336
 337    s->mit_irq_level = (pending_ints != 0);
 338    pci_set_irq(d, s->mit_irq_level);
 339}
 340
 341static void
 342e1000_mit_timer(void *opaque)
 343{
 344    E1000State *s = opaque;
 345
 346    s->mit_timer_on = 0;
 347    /* Call set_interrupt_cause to update the irq level (if necessary). */
 348    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 349}
 350
 351static void
 352set_ics(E1000State *s, int index, uint32_t val)
 353{
 354    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 355        s->mac_reg[IMS]);
 356    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 357}
 358
 359static void
 360e1000_autoneg_timer(void *opaque)
 361{
 362    E1000State *s = opaque;
 363    if (!qemu_get_queue(s->nic)->link_down) {
 364        e1000_autoneg_done(s);
 365        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 366    }
 367}
 368
 369static bool e1000_vet_init_need(void *opaque)
 370{
 371    E1000State *s = opaque;
 372
 373    return chkflag(VET);
 374}
 375
 376static void e1000_reset(void *opaque)
 377{
 378    E1000State *d = opaque;
 379    E1000BaseClass *edc = E1000_GET_CLASS(d);
 380    uint8_t *macaddr = d->conf.macaddr.a;
 381
 382    timer_del(d->autoneg_timer);
 383    timer_del(d->mit_timer);
 384    timer_del(d->flush_queue_timer);
 385    d->mit_timer_on = 0;
 386    d->mit_irq_level = 0;
 387    d->mit_ide = 0;
 388    memset(d->phy_reg, 0, sizeof d->phy_reg);
 389    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 390    d->phy_reg[PHY_ID2] = edc->phy_id2;
 391    memset(d->mac_reg, 0, sizeof d->mac_reg);
 392    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 393    d->rxbuf_min_shift = 1;
 394    memset(&d->tx, 0, sizeof d->tx);
 395
 396    if (qemu_get_queue(d->nic)->link_down) {
 397        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 398    }
 399
 400    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 401
 402    if (e1000_vet_init_need(d)) {
 403        d->mac_reg[VET] = ETH_P_VLAN;
 404    }
 405}
 406
 407static void
 408set_ctrl(E1000State *s, int index, uint32_t val)
 409{
 410    /* RST is self clearing */
 411    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 412}
 413
 414static void
 415e1000_flush_queue_timer(void *opaque)
 416{
 417    E1000State *s = opaque;
 418
 419    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 420}
 421
 422static void
 423set_rx_control(E1000State *s, int index, uint32_t val)
 424{
 425    s->mac_reg[RCTL] = val;
 426    s->rxbuf_size = e1000x_rxbufsize(val);
 427    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 428    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 429           s->mac_reg[RCTL]);
 430    timer_mod(s->flush_queue_timer,
 431              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
 432}
 433
 434static void
 435set_mdic(E1000State *s, int index, uint32_t val)
 436{
 437    uint32_t data = val & E1000_MDIC_DATA_MASK;
 438    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 439
 440    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 441        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 442    else if (val & E1000_MDIC_OP_READ) {
 443        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 444        if (!(phy_regcap[addr] & PHY_R)) {
 445            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 446            val |= E1000_MDIC_ERROR;
 447        } else
 448            val = (val ^ data) | s->phy_reg[addr];
 449    } else if (val & E1000_MDIC_OP_WRITE) {
 450        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 451        if (!(phy_regcap[addr] & PHY_W)) {
 452            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 453            val |= E1000_MDIC_ERROR;
 454        } else {
 455            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 456                phyreg_writeops[addr](s, index, data);
 457            } else {
 458                s->phy_reg[addr] = data;
 459            }
 460        }
 461    }
 462    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 463
 464    if (val & E1000_MDIC_INT_EN) {
 465        set_ics(s, 0, E1000_ICR_MDAC);
 466    }
 467}
 468
 469static uint32_t
 470get_eecd(E1000State *s, int index)
 471{
 472    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 473
 474    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 475           s->eecd_state.bitnum_out, s->eecd_state.reading);
 476    if (!s->eecd_state.reading ||
 477        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 478          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 479        ret |= E1000_EECD_DO;
 480    return ret;
 481}
 482
 483static void
 484set_eecd(E1000State *s, int index, uint32_t val)
 485{
 486    uint32_t oldval = s->eecd_state.old_eecd;
 487
 488    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 489            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 490    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 491        return;
 492    }
 493    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 494        s->eecd_state.val_in = 0;
 495        s->eecd_state.bitnum_in = 0;
 496        s->eecd_state.bitnum_out = 0;
 497        s->eecd_state.reading = 0;
 498    }
 499    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 500        return;
 501    }
 502    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 503        s->eecd_state.bitnum_out++;
 504        return;
 505    }
 506    s->eecd_state.val_in <<= 1;
 507    if (val & E1000_EECD_DI)
 508        s->eecd_state.val_in |= 1;
 509    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 510        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 511        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 512            EEPROM_READ_OPCODE_MICROWIRE);
 513    }
 514    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 515           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 516           s->eecd_state.reading);
 517}
 518
 519static uint32_t
 520flash_eerd_read(E1000State *s, int x)
 521{
 522    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 523
 524    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 525        return (s->mac_reg[EERD]);
 526
 527    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 528        return (E1000_EEPROM_RW_REG_DONE | r);
 529
 530    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 531           E1000_EEPROM_RW_REG_DONE | r);
 532}
 533
 534static void
 535putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 536{
 537    uint32_t sum;
 538
 539    if (cse && cse < n)
 540        n = cse + 1;
 541    if (sloc < n-1) {
 542        sum = net_checksum_add(n-css, data+css);
 543        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 544    }
 545}
 546
 547static inline void
 548inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 549{
 550    if (!memcmp(arr, bcast, sizeof bcast)) {
 551        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 552    } else if (arr[0] & 1) {
 553        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 554    }
 555}
 556
 557static void
 558e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 559{
 560    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 561                                    PTC1023, PTC1522 };
 562
 563    NetClientState *nc = qemu_get_queue(s->nic);
 564    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 565        qemu_receive_packet(nc, buf, size);
 566    } else {
 567        qemu_send_packet(nc, buf, size);
 568    }
 569    inc_tx_bcast_or_mcast_count(s, buf);
 570    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 571}
 572
 573static void
 574xmit_seg(E1000State *s)
 575{
 576    uint16_t len;
 577    unsigned int frames = s->tx.tso_frames, css, sofar;
 578    struct e1000_tx *tp = &s->tx;
 579    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 580
 581    if (tp->cptse) {
 582        css = props->ipcss;
 583        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 584               frames, tp->size, css);
 585        if (props->ip) {    /* IPv4 */
 586            stw_be_p(tp->data+css+2, tp->size - css);
 587            stw_be_p(tp->data+css+4,
 588                     lduw_be_p(tp->data + css + 4) + frames);
 589        } else {         /* IPv6 */
 590            stw_be_p(tp->data+css+4, tp->size - css);
 591        }
 592        css = props->tucss;
 593        len = tp->size - css;
 594        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 595        if (props->tcp) {
 596            sofar = frames * props->mss;
 597            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 598            if (props->paylen - sofar > props->mss) {
 599                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 600            } else if (frames) {
 601                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 602            }
 603        } else {    /* UDP */
 604            stw_be_p(tp->data+css+4, len);
 605        }
 606        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 607            unsigned int phsum;
 608            // add pseudo-header length before checksum calculation
 609            void *sp = tp->data + props->tucso;
 610
 611            phsum = lduw_be_p(sp) + len;
 612            phsum = (phsum >> 16) + (phsum & 0xffff);
 613            stw_be_p(sp, phsum);
 614        }
 615        tp->tso_frames++;
 616    }
 617
 618    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 619        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 620    }
 621    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 622        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 623    }
 624    if (tp->vlan_needed) {
 625        memmove(tp->vlan, tp->data, 4);
 626        memmove(tp->data, tp->data + 4, 8);
 627        memcpy(tp->data + 8, tp->vlan_header, 4);
 628        e1000_send_packet(s, tp->vlan, tp->size + 4);
 629    } else {
 630        e1000_send_packet(s, tp->data, tp->size);
 631    }
 632
 633    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 634    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 635    s->mac_reg[GPTC] = s->mac_reg[TPT];
 636    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 637    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 638}
 639
 640static void
 641process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 642{
 643    PCIDevice *d = PCI_DEVICE(s);
 644    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 645    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 646    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 647    unsigned int msh = 0xfffff;
 648    uint64_t addr;
 649    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 650    struct e1000_tx *tp = &s->tx;
 651
 652    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 653    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 654        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 655            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 656            s->use_tso_for_migration = 1;
 657            tp->tso_frames = 0;
 658        } else {
 659            e1000x_read_tx_ctx_descr(xp, &tp->props);
 660            s->use_tso_for_migration = 0;
 661        }
 662        return;
 663    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 664        // data descriptor
 665        if (tp->size == 0) {
 666            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 667        }
 668        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 669    } else {
 670        // legacy descriptor
 671        tp->cptse = 0;
 672    }
 673
 674    if (e1000x_vlan_enabled(s->mac_reg) &&
 675        e1000x_is_vlan_txd(txd_lower) &&
 676        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 677        tp->vlan_needed = 1;
 678        stw_be_p(tp->vlan_header,
 679                      le16_to_cpu(s->mac_reg[VET]));
 680        stw_be_p(tp->vlan_header + 2,
 681                      le16_to_cpu(dp->upper.fields.special));
 682    }
 683
 684    addr = le64_to_cpu(dp->buffer_addr);
 685    if (tp->cptse) {
 686        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 687        do {
 688            bytes = split_size;
 689            if (tp->size >= msh) {
 690                goto eop;
 691            }
 692            if (tp->size + bytes > msh)
 693                bytes = msh - tp->size;
 694
 695            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 696            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 697            sz = tp->size + bytes;
 698            if (sz >= tp->tso_props.hdr_len
 699                && tp->size < tp->tso_props.hdr_len) {
 700                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 701            }
 702            tp->size = sz;
 703            addr += bytes;
 704            if (sz == msh) {
 705                xmit_seg(s);
 706                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 707                tp->size = tp->tso_props.hdr_len;
 708            }
 709            split_size -= bytes;
 710        } while (bytes && split_size);
 711    } else {
 712        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 713        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 714        tp->size += split_size;
 715    }
 716
 717eop:
 718    if (!(txd_lower & E1000_TXD_CMD_EOP))
 719        return;
 720    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 721        xmit_seg(s);
 722    }
 723    tp->tso_frames = 0;
 724    tp->sum_needed = 0;
 725    tp->vlan_needed = 0;
 726    tp->size = 0;
 727    tp->cptse = 0;
 728}
 729
 730static uint32_t
 731txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 732{
 733    PCIDevice *d = PCI_DEVICE(s);
 734    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 735
 736    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 737        return 0;
 738    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 739                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 740    dp->upper.data = cpu_to_le32(txd_upper);
 741    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 742                  &dp->upper, sizeof(dp->upper));
 743    return E1000_ICR_TXDW;
 744}
 745
 746static uint64_t tx_desc_base(E1000State *s)
 747{
 748    uint64_t bah = s->mac_reg[TDBAH];
 749    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 750
 751    return (bah << 32) + bal;
 752}
 753
 754static void
 755start_xmit(E1000State *s)
 756{
 757    PCIDevice *d = PCI_DEVICE(s);
 758    dma_addr_t base;
 759    struct e1000_tx_desc desc;
 760    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 761
 762    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 763        DBGOUT(TX, "tx disabled\n");
 764        return;
 765    }
 766
 767    if (s->tx.busy) {
 768        return;
 769    }
 770    s->tx.busy = true;
 771
 772    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 773        base = tx_desc_base(s) +
 774               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 775        pci_dma_read(d, base, &desc, sizeof(desc));
 776
 777        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 778               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 779               desc.upper.data);
 780
 781        process_tx_desc(s, &desc);
 782        cause |= txdesc_writeback(s, base, &desc);
 783
 784        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 785            s->mac_reg[TDH] = 0;
 786        /*
 787         * the following could happen only if guest sw assigns
 788         * bogus values to TDT/TDLEN.
 789         * there's nothing too intelligent we could do about this.
 790         */
 791        if (s->mac_reg[TDH] == tdh_start ||
 792            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 793            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 794                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 795            break;
 796        }
 797    }
 798    s->tx.busy = false;
 799    set_ics(s, 0, cause);
 800}
 801
 802static int
 803receive_filter(E1000State *s, const uint8_t *buf, int size)
 804{
 805    uint32_t rctl = s->mac_reg[RCTL];
 806    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 807
 808    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 809        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 810        uint16_t vid = lduw_be_p(buf + 14);
 811        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 812                                 ((vid >> 5) & 0x7f));
 813        if ((vfta & (1 << (vid & 0x1f))) == 0)
 814            return 0;
 815    }
 816
 817    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 818        return 1;
 819    }
 820
 821    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 822        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 823        return 1;
 824    }
 825
 826    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 827        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 828        return 1;
 829    }
 830
 831    return e1000x_rx_group_filter(s->mac_reg, buf);
 832}
 833
 834static void
 835e1000_set_link_status(NetClientState *nc)
 836{
 837    E1000State *s = qemu_get_nic_opaque(nc);
 838    uint32_t old_status = s->mac_reg[STATUS];
 839
 840    if (nc->link_down) {
 841        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 842    } else {
 843        if (have_autoneg(s) &&
 844            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 845            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 846        } else {
 847            e1000_link_up(s);
 848        }
 849    }
 850
 851    if (s->mac_reg[STATUS] != old_status)
 852        set_ics(s, 0, E1000_ICR_LSC);
 853}
 854
 855static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 856{
 857    int bufs;
 858    /* Fast-path short packets */
 859    if (total_size <= s->rxbuf_size) {
 860        return s->mac_reg[RDH] != s->mac_reg[RDT];
 861    }
 862    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 863        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 864    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 865        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 866            s->mac_reg[RDT] - s->mac_reg[RDH];
 867    } else {
 868        return false;
 869    }
 870    return total_size <= bufs * s->rxbuf_size;
 871}
 872
 873static bool
 874e1000_can_receive(NetClientState *nc)
 875{
 876    E1000State *s = qemu_get_nic_opaque(nc);
 877
 878    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 879        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
 880}
 881
 882static uint64_t rx_desc_base(E1000State *s)
 883{
 884    uint64_t bah = s->mac_reg[RDBAH];
 885    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 886
 887    return (bah << 32) + bal;
 888}
 889
 890static void
 891e1000_receiver_overrun(E1000State *s, size_t size)
 892{
 893    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
 894    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
 895    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
 896    set_ics(s, 0, E1000_ICS_RXO);
 897}
 898
 899static ssize_t
 900e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 901{
 902    E1000State *s = qemu_get_nic_opaque(nc);
 903    PCIDevice *d = PCI_DEVICE(s);
 904    struct e1000_rx_desc desc;
 905    dma_addr_t base;
 906    unsigned int n, rdt;
 907    uint32_t rdh_start;
 908    uint16_t vlan_special = 0;
 909    uint8_t vlan_status = 0;
 910    uint8_t min_buf[MIN_BUF_SIZE];
 911    struct iovec min_iov;
 912    uint8_t *filter_buf = iov->iov_base;
 913    size_t size = iov_size(iov, iovcnt);
 914    size_t iov_ofs = 0;
 915    size_t desc_offset;
 916    size_t desc_size;
 917    size_t total_size;
 918
 919    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 920        return -1;
 921    }
 922
 923    if (timer_pending(s->flush_queue_timer)) {
 924        return 0;
 925    }
 926
 927    /* Pad to minimum Ethernet frame length */
 928    if (size < sizeof(min_buf)) {
 929        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 930        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 931        min_iov.iov_base = filter_buf = min_buf;
 932        min_iov.iov_len = size = sizeof(min_buf);
 933        iovcnt = 1;
 934        iov = &min_iov;
 935    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 936        /* This is very unlikely, but may happen. */
 937        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 938        filter_buf = min_buf;
 939    }
 940
 941    /* Discard oversized packets if !LPE and !SBP. */
 942    if (e1000x_is_oversized(s->mac_reg, size)) {
 943        return size;
 944    }
 945
 946    if (!receive_filter(s, filter_buf, size)) {
 947        return size;
 948    }
 949
 950    if (e1000x_vlan_enabled(s->mac_reg) &&
 951        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 952        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 953        iov_ofs = 4;
 954        if (filter_buf == iov->iov_base) {
 955            memmove(filter_buf + 4, filter_buf, 12);
 956        } else {
 957            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 958            while (iov->iov_len <= iov_ofs) {
 959                iov_ofs -= iov->iov_len;
 960                iov++;
 961            }
 962        }
 963        vlan_status = E1000_RXD_STAT_VP;
 964        size -= 4;
 965    }
 966
 967    rdh_start = s->mac_reg[RDH];
 968    desc_offset = 0;
 969    total_size = size + e1000x_fcs_len(s->mac_reg);
 970    if (!e1000_has_rxbufs(s, total_size)) {
 971        e1000_receiver_overrun(s, total_size);
 972        return -1;
 973    }
 974    do {
 975        desc_size = total_size - desc_offset;
 976        if (desc_size > s->rxbuf_size) {
 977            desc_size = s->rxbuf_size;
 978        }
 979        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 980        pci_dma_read(d, base, &desc, sizeof(desc));
 981        desc.special = vlan_special;
 982        desc.status &= ~E1000_RXD_STAT_DD;
 983        if (desc.buffer_addr) {
 984            if (desc_offset < size) {
 985                size_t iov_copy;
 986                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 987                size_t copy_size = size - desc_offset;
 988                if (copy_size > s->rxbuf_size) {
 989                    copy_size = s->rxbuf_size;
 990                }
 991                do {
 992                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 993                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 994                    copy_size -= iov_copy;
 995                    ba += iov_copy;
 996                    iov_ofs += iov_copy;
 997                    if (iov_ofs == iov->iov_len) {
 998                        iov++;
 999                        iov_ofs = 0;
1000                    }
1001                } while (copy_size);
1002            }
1003            desc_offset += desc_size;
1004            desc.length = cpu_to_le16(desc_size);
1005            if (desc_offset >= total_size) {
1006                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1007            } else {
1008                /* Guest zeroing out status is not a hardware requirement.
1009                   Clear EOP in case guest didn't do it. */
1010                desc.status &= ~E1000_RXD_STAT_EOP;
1011            }
1012        } else { // as per intel docs; skip descriptors with null buf addr
1013            DBGOUT(RX, "Null RX descriptor!!\n");
1014        }
1015        pci_dma_write(d, base, &desc, sizeof(desc));
1016        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1017        pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1018                      &desc.status, sizeof(desc.status));
1019
1020        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1021            s->mac_reg[RDH] = 0;
1022        /* see comment in start_xmit; same here */
1023        if (s->mac_reg[RDH] == rdh_start ||
1024            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1025            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1026                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1027            e1000_receiver_overrun(s, total_size);
1028            return -1;
1029        }
1030    } while (desc_offset < total_size);
1031
1032    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1033
1034    n = E1000_ICS_RXT0;
1035    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1036        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1037    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1038        s->rxbuf_min_shift)
1039        n |= E1000_ICS_RXDMT0;
1040
1041    set_ics(s, 0, n);
1042
1043    return size;
1044}
1045
1046static ssize_t
1047e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1048{
1049    const struct iovec iov = {
1050        .iov_base = (uint8_t *)buf,
1051        .iov_len = size
1052    };
1053
1054    return e1000_receive_iov(nc, &iov, 1);
1055}
1056
1057static uint32_t
1058mac_readreg(E1000State *s, int index)
1059{
1060    return s->mac_reg[index];
1061}
1062
1063static uint32_t
1064mac_low4_read(E1000State *s, int index)
1065{
1066    return s->mac_reg[index] & 0xf;
1067}
1068
1069static uint32_t
1070mac_low11_read(E1000State *s, int index)
1071{
1072    return s->mac_reg[index] & 0x7ff;
1073}
1074
1075static uint32_t
1076mac_low13_read(E1000State *s, int index)
1077{
1078    return s->mac_reg[index] & 0x1fff;
1079}
1080
1081static uint32_t
1082mac_low16_read(E1000State *s, int index)
1083{
1084    return s->mac_reg[index] & 0xffff;
1085}
1086
1087static uint32_t
1088mac_icr_read(E1000State *s, int index)
1089{
1090    uint32_t ret = s->mac_reg[ICR];
1091
1092    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1093    set_interrupt_cause(s, 0, 0);
1094    return ret;
1095}
1096
1097static uint32_t
1098mac_read_clr4(E1000State *s, int index)
1099{
1100    uint32_t ret = s->mac_reg[index];
1101
1102    s->mac_reg[index] = 0;
1103    return ret;
1104}
1105
1106static uint32_t
1107mac_read_clr8(E1000State *s, int index)
1108{
1109    uint32_t ret = s->mac_reg[index];
1110
1111    s->mac_reg[index] = 0;
1112    s->mac_reg[index-1] = 0;
1113    return ret;
1114}
1115
1116static void
1117mac_writereg(E1000State *s, int index, uint32_t val)
1118{
1119    uint32_t macaddr[2];
1120
1121    s->mac_reg[index] = val;
1122
1123    if (index == RA + 1) {
1124        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1125        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1126        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1127    }
1128}
1129
1130static void
1131set_rdt(E1000State *s, int index, uint32_t val)
1132{
1133    s->mac_reg[index] = val & 0xffff;
1134    if (e1000_has_rxbufs(s, 1)) {
1135        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1136    }
1137}
1138
1139static void
1140set_16bit(E1000State *s, int index, uint32_t val)
1141{
1142    s->mac_reg[index] = val & 0xffff;
1143}
1144
1145static void
1146set_dlen(E1000State *s, int index, uint32_t val)
1147{
1148    s->mac_reg[index] = val & 0xfff80;
1149}
1150
1151static void
1152set_tctl(E1000State *s, int index, uint32_t val)
1153{
1154    s->mac_reg[index] = val;
1155    s->mac_reg[TDT] &= 0xffff;
1156    start_xmit(s);
1157}
1158
1159static void
1160set_icr(E1000State *s, int index, uint32_t val)
1161{
1162    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1163    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1164}
1165
1166static void
1167set_imc(E1000State *s, int index, uint32_t val)
1168{
1169    s->mac_reg[IMS] &= ~val;
1170    set_ics(s, 0, 0);
1171}
1172
1173static void
1174set_ims(E1000State *s, int index, uint32_t val)
1175{
1176    s->mac_reg[IMS] |= val;
1177    set_ics(s, 0, 0);
1178}
1179
1180#define getreg(x)    [x] = mac_readreg
1181typedef uint32_t (*readops)(E1000State *, int);
1182static const readops macreg_readops[] = {
1183    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1184    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1185    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1186    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1187    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1188    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1189    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1190    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1191    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1192    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1193    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1194    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1195    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1196    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1197    getreg(GOTCL),
1198
1199    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1200    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1201    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1202    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1203    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1204    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1205    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1206    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1207    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1208    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1209    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1210    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1211    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1212    [MPTC]    = mac_read_clr4,
1213    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1214    [EERD]    = flash_eerd_read,
1215    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1216    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1217    [RDFPC]   = mac_low13_read,
1218    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1219    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1220    [TDFPC]   = mac_low13_read,
1221    [AIT]     = mac_low16_read,
1222
1223    [CRCERRS ... MPC]   = &mac_readreg,
1224    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1225    [FFLT ... FFLT+6]   = &mac_low11_read,
1226    [RA ... RA+31]      = &mac_readreg,
1227    [WUPM ... WUPM+31]  = &mac_readreg,
1228    [MTA ... MTA+127]   = &mac_readreg,
1229    [VFTA ... VFTA+127] = &mac_readreg,
1230    [FFMT ... FFMT+254] = &mac_low4_read,
1231    [FFVT ... FFVT+254] = &mac_readreg,
1232    [PBM ... PBM+16383] = &mac_readreg,
1233};
1234enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1235
1236#define putreg(x)    [x] = mac_writereg
1237typedef void (*writeops)(E1000State *, int, uint32_t);
1238static const writeops macreg_writeops[] = {
1239    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1240    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1241    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1242    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1243    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1244    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1245    putreg(WUS),      putreg(AIT),
1246
1247    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1248    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1249    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1250    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1251    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1252    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1253    [ITR]    = set_16bit,
1254
1255    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1256    [FFLT ... FFLT+6]   = &mac_writereg,
1257    [RA ... RA+31]      = &mac_writereg,
1258    [WUPM ... WUPM+31]  = &mac_writereg,
1259    [MTA ... MTA+127]   = &mac_writereg,
1260    [VFTA ... VFTA+127] = &mac_writereg,
1261    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1262    [PBM ... PBM+16383] = &mac_writereg,
1263};
1264
1265enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1266
1267enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1268
1269#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1270/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1271 * f - flag bits (up to 6 possible flags)
1272 * n - flag needed
1273 * p - partially implenented */
1274static const uint8_t mac_reg_access[0x8000] = {
1275    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1276    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1277
1278    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1279    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1280    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1281    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1282    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1283    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1284    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1285    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1286    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1287    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1288    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1289    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1290    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1291    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1292    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1293    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1294    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1295    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1296    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1297    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1298    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1299    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1300    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1301    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1302    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1303    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1304    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1305    [BPTC]    = markflag(MAC),
1306
1307    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1308    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1309    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1310    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1311    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1312    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1313    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1314    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1315    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1316    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1317    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1318};
1319
1320static void
1321e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1322                 unsigned size)
1323{
1324    E1000State *s = opaque;
1325    unsigned int index = (addr & 0x1ffff) >> 2;
1326
1327    if (index < NWRITEOPS && macreg_writeops[index]) {
1328        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1329            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1330            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1331                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1332                       "It is not fully implemented.\n", index<<2);
1333            }
1334            macreg_writeops[index](s, index, val);
1335        } else {    /* "flag needed" bit is set, but the flag is not active */
1336            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1337                   index<<2);
1338        }
1339    } else if (index < NREADOPS && macreg_readops[index]) {
1340        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1341               index<<2, val);
1342    } else {
1343        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1344               index<<2, val);
1345    }
1346}
1347
1348static uint64_t
1349e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1350{
1351    E1000State *s = opaque;
1352    unsigned int index = (addr & 0x1ffff) >> 2;
1353
1354    if (index < NREADOPS && macreg_readops[index]) {
1355        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1356            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1357            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1358                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1359                       "It is not fully implemented.\n", index<<2);
1360            }
1361            return macreg_readops[index](s, index);
1362        } else {    /* "flag needed" bit is set, but the flag is not active */
1363            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1364                   index<<2);
1365        }
1366    } else {
1367        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1368    }
1369    return 0;
1370}
1371
1372static const MemoryRegionOps e1000_mmio_ops = {
1373    .read = e1000_mmio_read,
1374    .write = e1000_mmio_write,
1375    .endianness = DEVICE_LITTLE_ENDIAN,
1376    .impl = {
1377        .min_access_size = 4,
1378        .max_access_size = 4,
1379    },
1380};
1381
1382static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1383                              unsigned size)
1384{
1385    E1000State *s = opaque;
1386
1387    (void)s;
1388    return 0;
1389}
1390
1391static void e1000_io_write(void *opaque, hwaddr addr,
1392                           uint64_t val, unsigned size)
1393{
1394    E1000State *s = opaque;
1395
1396    (void)s;
1397}
1398
1399static const MemoryRegionOps e1000_io_ops = {
1400    .read = e1000_io_read,
1401    .write = e1000_io_write,
1402    .endianness = DEVICE_LITTLE_ENDIAN,
1403};
1404
1405static bool is_version_1(void *opaque, int version_id)
1406{
1407    return version_id == 1;
1408}
1409
1410static int e1000_pre_save(void *opaque)
1411{
1412    E1000State *s = opaque;
1413    NetClientState *nc = qemu_get_queue(s->nic);
1414
1415    /*
1416     * If link is down and auto-negotiation is supported and ongoing,
1417     * complete auto-negotiation immediately. This allows us to look
1418     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1419     */
1420    if (nc->link_down && have_autoneg(s)) {
1421        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1422    }
1423
1424    /* Decide which set of props to migrate in the main structure */
1425    if (chkflag(TSO) || !s->use_tso_for_migration) {
1426        /* Either we're migrating with the extra subsection, in which
1427         * case the mig_props is always 'props' OR
1428         * we've not got the subsection, but 'props' was the last
1429         * updated.
1430         */
1431        s->mig_props = s->tx.props;
1432    } else {
1433        /* We're not using the subsection, and 'tso_props' was
1434         * the last updated.
1435         */
1436        s->mig_props = s->tx.tso_props;
1437    }
1438    return 0;
1439}
1440
1441static int e1000_post_load(void *opaque, int version_id)
1442{
1443    E1000State *s = opaque;
1444    NetClientState *nc = qemu_get_queue(s->nic);
1445
1446    if (!chkflag(MIT)) {
1447        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1448            s->mac_reg[TADV] = 0;
1449        s->mit_irq_level = false;
1450    }
1451    s->mit_ide = 0;
1452    s->mit_timer_on = true;
1453    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1454
1455    /* nc.link_down can't be migrated, so infer link_down according
1456     * to link status bit in mac_reg[STATUS].
1457     * Alternatively, restart link negotiation if it was in progress. */
1458    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1459
1460    if (have_autoneg(s) &&
1461        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1462        nc->link_down = false;
1463        timer_mod(s->autoneg_timer,
1464                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1465    }
1466
1467    s->tx.props = s->mig_props;
1468    if (!s->received_tx_tso) {
1469        /* We received only one set of offload data (tx.props)
1470         * and haven't got tx.tso_props.  The best we can do
1471         * is dupe the data.
1472         */
1473        s->tx.tso_props = s->mig_props;
1474    }
1475    return 0;
1476}
1477
1478static int e1000_tx_tso_post_load(void *opaque, int version_id)
1479{
1480    E1000State *s = opaque;
1481    s->received_tx_tso = true;
1482    return 0;
1483}
1484
1485static bool e1000_mit_state_needed(void *opaque)
1486{
1487    E1000State *s = opaque;
1488
1489    return chkflag(MIT);
1490}
1491
1492static bool e1000_full_mac_needed(void *opaque)
1493{
1494    E1000State *s = opaque;
1495
1496    return chkflag(MAC);
1497}
1498
1499static bool e1000_tso_state_needed(void *opaque)
1500{
1501    E1000State *s = opaque;
1502
1503    return chkflag(TSO);
1504}
1505
1506static const VMStateDescription vmstate_e1000_mit_state = {
1507    .name = "e1000/mit_state",
1508    .version_id = 1,
1509    .minimum_version_id = 1,
1510    .needed = e1000_mit_state_needed,
1511    .fields = (VMStateField[]) {
1512        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1513        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1514        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1515        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1516        VMSTATE_BOOL(mit_irq_level, E1000State),
1517        VMSTATE_END_OF_LIST()
1518    }
1519};
1520
1521static const VMStateDescription vmstate_e1000_full_mac_state = {
1522    .name = "e1000/full_mac_state",
1523    .version_id = 1,
1524    .minimum_version_id = 1,
1525    .needed = e1000_full_mac_needed,
1526    .fields = (VMStateField[]) {
1527        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1528        VMSTATE_END_OF_LIST()
1529    }
1530};
1531
1532static const VMStateDescription vmstate_e1000_tx_tso_state = {
1533    .name = "e1000/tx_tso_state",
1534    .version_id = 1,
1535    .minimum_version_id = 1,
1536    .needed = e1000_tso_state_needed,
1537    .post_load = e1000_tx_tso_post_load,
1538    .fields = (VMStateField[]) {
1539        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1540        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1541        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1542        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1543        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1544        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1545        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1546        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1547        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1548        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1549        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1550        VMSTATE_END_OF_LIST()
1551    }
1552};
1553
1554static const VMStateDescription vmstate_e1000 = {
1555    .name = "e1000",
1556    .version_id = 2,
1557    .minimum_version_id = 1,
1558    .pre_save = e1000_pre_save,
1559    .post_load = e1000_post_load,
1560    .fields = (VMStateField[]) {
1561        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1562        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1563        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1564        VMSTATE_UINT32(rxbuf_size, E1000State),
1565        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1566        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1567        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1568        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1569        VMSTATE_UINT16(eecd_state.reading, E1000State),
1570        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1571        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1572        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1573        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1574        VMSTATE_UINT8(mig_props.tucss, E1000State),
1575        VMSTATE_UINT8(mig_props.tucso, E1000State),
1576        VMSTATE_UINT16(mig_props.tucse, E1000State),
1577        VMSTATE_UINT32(mig_props.paylen, E1000State),
1578        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1579        VMSTATE_UINT16(mig_props.mss, E1000State),
1580        VMSTATE_UINT16(tx.size, E1000State),
1581        VMSTATE_UINT16(tx.tso_frames, E1000State),
1582        VMSTATE_UINT8(tx.sum_needed, E1000State),
1583        VMSTATE_INT8(mig_props.ip, E1000State),
1584        VMSTATE_INT8(mig_props.tcp, E1000State),
1585        VMSTATE_BUFFER(tx.header, E1000State),
1586        VMSTATE_BUFFER(tx.data, E1000State),
1587        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1588        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1589        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1590        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1591        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1592        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1593        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1594        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1595        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1596        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1597        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1598        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1599        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1600        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1601        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1602        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1603        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1604        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1605        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1606        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1607        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1608        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1609        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1610        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1611        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1612        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1613        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1614        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1615        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1616        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1617        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1618        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1619        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1620        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1621        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1622        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1623        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1624        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1625        VMSTATE_UINT32(mac_reg[VET], E1000State),
1626        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1627        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1628        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1629        VMSTATE_END_OF_LIST()
1630    },
1631    .subsections = (const VMStateDescription*[]) {
1632        &vmstate_e1000_mit_state,
1633        &vmstate_e1000_full_mac_state,
1634        &vmstate_e1000_tx_tso_state,
1635        NULL
1636    }
1637};
1638
1639/*
1640 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1641 * Note: A valid DevId will be inserted during pci_e1000_realize().
1642 */
1643static const uint16_t e1000_eeprom_template[64] = {
1644    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1645    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1646    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1647    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1648    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1649    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1650    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1651    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1652};
1653
1654/* PCI interface */
1655
1656static void
1657e1000_mmio_setup(E1000State *d)
1658{
1659    int i;
1660    const uint32_t excluded_regs[] = {
1661        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1662        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1663    };
1664
1665    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1666                          "e1000-mmio", PNPMMIO_SIZE);
1667    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1668    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1669        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1670                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1671    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1672}
1673
1674static void
1675pci_e1000_uninit(PCIDevice *dev)
1676{
1677    E1000State *d = E1000(dev);
1678
1679    timer_free(d->autoneg_timer);
1680    timer_free(d->mit_timer);
1681    timer_free(d->flush_queue_timer);
1682    qemu_del_nic(d->nic);
1683}
1684
1685static NetClientInfo net_e1000_info = {
1686    .type = NET_CLIENT_DRIVER_NIC,
1687    .size = sizeof(NICState),
1688    .can_receive = e1000_can_receive,
1689    .receive = e1000_receive,
1690    .receive_iov = e1000_receive_iov,
1691    .link_status_changed = e1000_set_link_status,
1692};
1693
1694static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1695                                uint32_t val, int len)
1696{
1697    E1000State *s = E1000(pci_dev);
1698
1699    pci_default_write_config(pci_dev, address, val, len);
1700
1701    if (range_covers_byte(address, len, PCI_COMMAND) &&
1702        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1703        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1704    }
1705}
1706
1707static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1708{
1709    DeviceState *dev = DEVICE(pci_dev);
1710    E1000State *d = E1000(pci_dev);
1711    uint8_t *pci_conf;
1712    uint8_t *macaddr;
1713
1714    pci_dev->config_write = e1000_write_config;
1715
1716    pci_conf = pci_dev->config;
1717
1718    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1719    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1720
1721    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1722
1723    e1000_mmio_setup(d);
1724
1725    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1726
1727    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1728
1729    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1730    macaddr = d->conf.macaddr.a;
1731
1732    e1000x_core_prepare_eeprom(d->eeprom_data,
1733                               e1000_eeprom_template,
1734                               sizeof(e1000_eeprom_template),
1735                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1736                               macaddr);
1737
1738    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1739                          object_get_typename(OBJECT(d)), dev->id, d);
1740
1741    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1742
1743    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1744    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1745    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1746                                        e1000_flush_queue_timer, d);
1747}
1748
1749static void qdev_e1000_reset(DeviceState *dev)
1750{
1751    E1000State *d = E1000(dev);
1752    e1000_reset(d);
1753}
1754
1755static Property e1000_properties[] = {
1756    DEFINE_NIC_PROPERTIES(E1000State, conf),
1757    DEFINE_PROP_BIT("autonegotiation", E1000State,
1758                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1759    DEFINE_PROP_BIT("mitigation", E1000State,
1760                    compat_flags, E1000_FLAG_MIT_BIT, true),
1761    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1762                    compat_flags, E1000_FLAG_MAC_BIT, true),
1763    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1764                    compat_flags, E1000_FLAG_TSO_BIT, true),
1765    DEFINE_PROP_BIT("init-vet", E1000State,
1766                    compat_flags, E1000_FLAG_VET_BIT, true),
1767    DEFINE_PROP_END_OF_LIST(),
1768};
1769
1770typedef struct E1000Info {
1771    const char *name;
1772    uint16_t   device_id;
1773    uint8_t    revision;
1774    uint16_t   phy_id2;
1775} E1000Info;
1776
1777static void e1000_class_init(ObjectClass *klass, void *data)
1778{
1779    DeviceClass *dc = DEVICE_CLASS(klass);
1780    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1781    E1000BaseClass *e = E1000_CLASS(klass);
1782    const E1000Info *info = data;
1783
1784    k->realize = pci_e1000_realize;
1785    k->exit = pci_e1000_uninit;
1786    k->romfile = "efi-e1000.rom";
1787    k->vendor_id = PCI_VENDOR_ID_INTEL;
1788    k->device_id = info->device_id;
1789    k->revision = info->revision;
1790    e->phy_id2 = info->phy_id2;
1791    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1792    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1793    dc->desc = "Intel Gigabit Ethernet";
1794    dc->reset = qdev_e1000_reset;
1795    dc->vmsd = &vmstate_e1000;
1796    device_class_set_props(dc, e1000_properties);
1797}
1798
1799static void e1000_instance_init(Object *obj)
1800{
1801    E1000State *n = E1000(obj);
1802    device_add_bootindex_property(obj, &n->conf.bootindex,
1803                                  "bootindex", "/ethernet-phy@0",
1804                                  DEVICE(n));
1805}
1806
1807static const TypeInfo e1000_base_info = {
1808    .name          = TYPE_E1000_BASE,
1809    .parent        = TYPE_PCI_DEVICE,
1810    .instance_size = sizeof(E1000State),
1811    .instance_init = e1000_instance_init,
1812    .class_size    = sizeof(E1000BaseClass),
1813    .abstract      = true,
1814    .interfaces = (InterfaceInfo[]) {
1815        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1816        { },
1817    },
1818};
1819
1820static const E1000Info e1000_devices[] = {
1821    {
1822        .name      = "e1000",
1823        .device_id = E1000_DEV_ID_82540EM,
1824        .revision  = 0x03,
1825        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1826    },
1827    {
1828        .name      = "e1000-82544gc",
1829        .device_id = E1000_DEV_ID_82544GC_COPPER,
1830        .revision  = 0x03,
1831        .phy_id2   = E1000_PHY_ID2_82544x,
1832    },
1833    {
1834        .name      = "e1000-82545em",
1835        .device_id = E1000_DEV_ID_82545EM_COPPER,
1836        .revision  = 0x03,
1837        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1838    },
1839};
1840
1841static void e1000_register_types(void)
1842{
1843    int i;
1844
1845    type_register_static(&e1000_base_info);
1846    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1847        const E1000Info *info = &e1000_devices[i];
1848        TypeInfo type_info = {};
1849
1850        type_info.name = info->name;
1851        type_info.parent = TYPE_E1000_BASE;
1852        type_info.class_data = (void *)info;
1853        type_info.class_init = e1000_class_init;
1854
1855        type_register(&type_info);
1856    }
1857}
1858
1859type_init(e1000_register_types)
1860