qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/hw.h"
  30#include "hw/pci/pci.h"
  31#include "net/net.h"
  32#include "net/checksum.h"
  33#include "sysemu/sysemu.h"
  34#include "sysemu/dma.h"
  35#include "qemu/iov.h"
  36#include "qemu/range.h"
  37
  38#include "e1000x_common.h"
  39
  40static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  41
  42/* #define E1000_DEBUG */
  43
  44#ifdef E1000_DEBUG
  45enum {
  46    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  47    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  48    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  49    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  50};
  51#define DBGBIT(x)    (1<<DEBUG_##x)
  52static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  53
  54#define DBGOUT(what, fmt, ...) do { \
  55    if (debugflags & DBGBIT(what)) \
  56        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  57    } while (0)
  58#else
  59#define DBGOUT(what, fmt, ...) do {} while (0)
  60#endif
  61
  62#define IOPORT_SIZE       0x40
  63#define PNPMMIO_SIZE      0x20000
  64#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  65
  66#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  67
  68/*
  69 * HW models:
  70 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  71 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  72 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  73 *  Others never tested
  74 */
  75
  76typedef struct E1000State_st {
  77    /*< private >*/
  78    PCIDevice parent_obj;
  79    /*< public >*/
  80
  81    NICState *nic;
  82    NICConf conf;
  83    MemoryRegion mmio;
  84    MemoryRegion io;
  85
  86    uint32_t mac_reg[0x8000];
  87    uint16_t phy_reg[0x20];
  88    uint16_t eeprom_data[64];
  89
  90    uint32_t rxbuf_size;
  91    uint32_t rxbuf_min_shift;
  92    struct e1000_tx {
  93        unsigned char header[256];
  94        unsigned char vlan_header[4];
  95        /* Fields vlan and data must not be reordered or separated. */
  96        unsigned char vlan[4];
  97        unsigned char data[0x10000];
  98        uint16_t size;
  99        unsigned char vlan_needed;
 100        unsigned char sum_needed;
 101        bool cptse;
 102        e1000x_txd_props props;
 103        e1000x_txd_props tso_props;
 104        uint16_t tso_frames;
 105    } tx;
 106
 107    struct {
 108        uint32_t val_in;    /* shifted in from guest driver */
 109        uint16_t bitnum_in;
 110        uint16_t bitnum_out;
 111        uint16_t reading;
 112        uint32_t old_eecd;
 113    } eecd_state;
 114
 115    QEMUTimer *autoneg_timer;
 116
 117    QEMUTimer *mit_timer;      /* Mitigation timer. */
 118    bool mit_timer_on;         /* Mitigation timer is running. */
 119    bool mit_irq_level;        /* Tracks interrupt pin level. */
 120    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 121
 122/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 123#define E1000_FLAG_AUTONEG_BIT 0
 124#define E1000_FLAG_MIT_BIT 1
 125#define E1000_FLAG_MAC_BIT 2
 126#define E1000_FLAG_TSO_BIT 3
 127#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 128#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 129#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 130#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 131    uint32_t compat_flags;
 132    bool received_tx_tso;
 133    bool use_tso_for_migration;
 134    e1000x_txd_props mig_props;
 135} E1000State;
 136
 137#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 138
 139typedef struct E1000BaseClass {
 140    PCIDeviceClass parent_class;
 141    uint16_t phy_id2;
 142} E1000BaseClass;
 143
 144#define TYPE_E1000_BASE "e1000-base"
 145
 146#define E1000(obj) \
 147    OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
 148
 149#define E1000_DEVICE_CLASS(klass) \
 150     OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
 151#define E1000_DEVICE_GET_CLASS(obj) \
 152    OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
 153
 154static void
 155e1000_link_up(E1000State *s)
 156{
 157    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 158
 159    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 160    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 161}
 162
 163static void
 164e1000_autoneg_done(E1000State *s)
 165{
 166    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 167
 168    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 169    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 170}
 171
 172static bool
 173have_autoneg(E1000State *s)
 174{
 175    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 176}
 177
 178static void
 179set_phy_ctrl(E1000State *s, int index, uint16_t val)
 180{
 181    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 182    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 183                                   MII_CR_RESET |
 184                                   MII_CR_RESTART_AUTO_NEG);
 185
 186    /*
 187     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 188     * migrate during auto negotiation, after migration the link will be
 189     * down.
 190     */
 191    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 192        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 193    }
 194}
 195
 196static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 197    [PHY_CTRL] = set_phy_ctrl,
 198};
 199
 200enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 201
 202enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 203static const char phy_regcap[0x20] = {
 204    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 205    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 206    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 207    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 208    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 209    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 210    [PHY_AUTONEG_EXP] = PHY_R,
 211};
 212
 213/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 214static const uint16_t phy_reg_init[] = {
 215    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 216                   MII_CR_FULL_DUPLEX |
 217                   MII_CR_AUTO_NEG_EN,
 218
 219    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 220                   MII_SR_LINK_STATUS |   /* link initially up */
 221                   MII_SR_AUTONEG_CAPS |
 222                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 223                   MII_SR_PREAMBLE_SUPPRESS |
 224                   MII_SR_EXTENDED_STATUS |
 225                   MII_SR_10T_HD_CAPS |
 226                   MII_SR_10T_FD_CAPS |
 227                   MII_SR_100X_HD_CAPS |
 228                   MII_SR_100X_FD_CAPS,
 229
 230    [PHY_ID1] = 0x141,
 231    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 232    [PHY_AUTONEG_ADV] = 0xde1,
 233    [PHY_LP_ABILITY] = 0x1e0,
 234    [PHY_1000T_CTRL] = 0x0e00,
 235    [PHY_1000T_STATUS] = 0x3c00,
 236    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 237    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 238    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 239};
 240
 241static const uint32_t mac_reg_init[] = {
 242    [PBA]     = 0x00100030,
 243    [LEDCTL]  = 0x602,
 244    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 245                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 246    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 247                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 248                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 249                E1000_STATUS_LU,
 250    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 251                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 252                E1000_MANC_RMCP_EN,
 253};
 254
 255/* Helper function, *curr == 0 means the value is not set */
 256static inline void
 257mit_update_delay(uint32_t *curr, uint32_t value)
 258{
 259    if (value && (*curr == 0 || value < *curr)) {
 260        *curr = value;
 261    }
 262}
 263
 264static void
 265set_interrupt_cause(E1000State *s, int index, uint32_t val)
 266{
 267    PCIDevice *d = PCI_DEVICE(s);
 268    uint32_t pending_ints;
 269    uint32_t mit_delay;
 270
 271    s->mac_reg[ICR] = val;
 272
 273    /*
 274     * Make sure ICR and ICS registers have the same value.
 275     * The spec says that the ICS register is write-only.  However in practice,
 276     * on real hardware ICS is readable, and for reads it has the same value as
 277     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 278     *
 279     * The VxWorks PRO/1000 driver uses this behaviour.
 280     */
 281    s->mac_reg[ICS] = val;
 282
 283    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 284    if (!s->mit_irq_level && pending_ints) {
 285        /*
 286         * Here we detect a potential raising edge. We postpone raising the
 287         * interrupt line if we are inside the mitigation delay window
 288         * (s->mit_timer_on == 1).
 289         * We provide a partial implementation of interrupt mitigation,
 290         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 291         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 292         * RADV; relative timers based on TIDV and RDTR are not implemented.
 293         */
 294        if (s->mit_timer_on) {
 295            return;
 296        }
 297        if (chkflag(MIT)) {
 298            /* Compute the next mitigation delay according to pending
 299             * interrupts and the current values of RADV (provided
 300             * RDTR!=0), TADV and ITR.
 301             * Then rearm the timer.
 302             */
 303            mit_delay = 0;
 304            if (s->mit_ide &&
 305                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 306                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 307            }
 308            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 309                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 310            }
 311            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 312
 313            /*
 314             * According to e1000 SPEC, the Ethernet controller guarantees
 315             * a maximum observable interrupt rate of 7813 interrupts/sec.
 316             * Thus if mit_delay < 500 then the delay should be set to the
 317             * minimum delay possible which is 500.
 318             */
 319            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 320
 321            s->mit_timer_on = 1;
 322            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 323                      mit_delay * 256);
 324            s->mit_ide = 0;
 325        }
 326    }
 327
 328    s->mit_irq_level = (pending_ints != 0);
 329    pci_set_irq(d, s->mit_irq_level);
 330}
 331
 332static void
 333e1000_mit_timer(void *opaque)
 334{
 335    E1000State *s = opaque;
 336
 337    s->mit_timer_on = 0;
 338    /* Call set_interrupt_cause to update the irq level (if necessary). */
 339    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 340}
 341
 342static void
 343set_ics(E1000State *s, int index, uint32_t val)
 344{
 345    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 346        s->mac_reg[IMS]);
 347    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 348}
 349
 350static void
 351e1000_autoneg_timer(void *opaque)
 352{
 353    E1000State *s = opaque;
 354    if (!qemu_get_queue(s->nic)->link_down) {
 355        e1000_autoneg_done(s);
 356        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 357    }
 358}
 359
 360static void e1000_reset(void *opaque)
 361{
 362    E1000State *d = opaque;
 363    E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
 364    uint8_t *macaddr = d->conf.macaddr.a;
 365
 366    timer_del(d->autoneg_timer);
 367    timer_del(d->mit_timer);
 368    d->mit_timer_on = 0;
 369    d->mit_irq_level = 0;
 370    d->mit_ide = 0;
 371    memset(d->phy_reg, 0, sizeof d->phy_reg);
 372    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 373    d->phy_reg[PHY_ID2] = edc->phy_id2;
 374    memset(d->mac_reg, 0, sizeof d->mac_reg);
 375    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 376    d->rxbuf_min_shift = 1;
 377    memset(&d->tx, 0, sizeof d->tx);
 378
 379    if (qemu_get_queue(d->nic)->link_down) {
 380        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 381    }
 382
 383    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 384}
 385
 386static void
 387set_ctrl(E1000State *s, int index, uint32_t val)
 388{
 389    /* RST is self clearing */
 390    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 391}
 392
 393static void
 394set_rx_control(E1000State *s, int index, uint32_t val)
 395{
 396    s->mac_reg[RCTL] = val;
 397    s->rxbuf_size = e1000x_rxbufsize(val);
 398    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 399    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 400           s->mac_reg[RCTL]);
 401    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 402}
 403
 404static void
 405set_mdic(E1000State *s, int index, uint32_t val)
 406{
 407    uint32_t data = val & E1000_MDIC_DATA_MASK;
 408    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 409
 410    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 411        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 412    else if (val & E1000_MDIC_OP_READ) {
 413        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 414        if (!(phy_regcap[addr] & PHY_R)) {
 415            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 416            val |= E1000_MDIC_ERROR;
 417        } else
 418            val = (val ^ data) | s->phy_reg[addr];
 419    } else if (val & E1000_MDIC_OP_WRITE) {
 420        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 421        if (!(phy_regcap[addr] & PHY_W)) {
 422            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 423            val |= E1000_MDIC_ERROR;
 424        } else {
 425            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 426                phyreg_writeops[addr](s, index, data);
 427            } else {
 428                s->phy_reg[addr] = data;
 429            }
 430        }
 431    }
 432    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 433
 434    if (val & E1000_MDIC_INT_EN) {
 435        set_ics(s, 0, E1000_ICR_MDAC);
 436    }
 437}
 438
 439static uint32_t
 440get_eecd(E1000State *s, int index)
 441{
 442    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 443
 444    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 445           s->eecd_state.bitnum_out, s->eecd_state.reading);
 446    if (!s->eecd_state.reading ||
 447        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 448          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 449        ret |= E1000_EECD_DO;
 450    return ret;
 451}
 452
 453static void
 454set_eecd(E1000State *s, int index, uint32_t val)
 455{
 456    uint32_t oldval = s->eecd_state.old_eecd;
 457
 458    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 459            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 460    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 461        return;
 462    }
 463    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 464        s->eecd_state.val_in = 0;
 465        s->eecd_state.bitnum_in = 0;
 466        s->eecd_state.bitnum_out = 0;
 467        s->eecd_state.reading = 0;
 468    }
 469    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 470        return;
 471    }
 472    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 473        s->eecd_state.bitnum_out++;
 474        return;
 475    }
 476    s->eecd_state.val_in <<= 1;
 477    if (val & E1000_EECD_DI)
 478        s->eecd_state.val_in |= 1;
 479    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 480        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 481        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 482            EEPROM_READ_OPCODE_MICROWIRE);
 483    }
 484    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 485           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 486           s->eecd_state.reading);
 487}
 488
 489static uint32_t
 490flash_eerd_read(E1000State *s, int x)
 491{
 492    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 493
 494    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 495        return (s->mac_reg[EERD]);
 496
 497    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 498        return (E1000_EEPROM_RW_REG_DONE | r);
 499
 500    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 501           E1000_EEPROM_RW_REG_DONE | r);
 502}
 503
 504static void
 505putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 506{
 507    uint32_t sum;
 508
 509    if (cse && cse < n)
 510        n = cse + 1;
 511    if (sloc < n-1) {
 512        sum = net_checksum_add(n-css, data+css);
 513        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 514    }
 515}
 516
 517static inline void
 518inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 519{
 520    if (!memcmp(arr, bcast, sizeof bcast)) {
 521        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 522    } else if (arr[0] & 1) {
 523        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 524    }
 525}
 526
 527static void
 528e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 529{
 530    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 531                                    PTC1023, PTC1522 };
 532
 533    NetClientState *nc = qemu_get_queue(s->nic);
 534    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 535        nc->info->receive(nc, buf, size);
 536    } else {
 537        qemu_send_packet(nc, buf, size);
 538    }
 539    inc_tx_bcast_or_mcast_count(s, buf);
 540    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 541}
 542
 543static void
 544xmit_seg(E1000State *s)
 545{
 546    uint16_t len;
 547    unsigned int frames = s->tx.tso_frames, css, sofar;
 548    struct e1000_tx *tp = &s->tx;
 549    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 550
 551    if (tp->cptse) {
 552        css = props->ipcss;
 553        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 554               frames, tp->size, css);
 555        if (props->ip) {    /* IPv4 */
 556            stw_be_p(tp->data+css+2, tp->size - css);
 557            stw_be_p(tp->data+css+4,
 558                     lduw_be_p(tp->data + css + 4) + frames);
 559        } else {         /* IPv6 */
 560            stw_be_p(tp->data+css+4, tp->size - css);
 561        }
 562        css = props->tucss;
 563        len = tp->size - css;
 564        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 565        if (props->tcp) {
 566            sofar = frames * props->mss;
 567            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 568            if (props->paylen - sofar > props->mss) {
 569                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 570            } else if (frames) {
 571                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 572            }
 573        } else {    /* UDP */
 574            stw_be_p(tp->data+css+4, len);
 575        }
 576        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 577            unsigned int phsum;
 578            // add pseudo-header length before checksum calculation
 579            void *sp = tp->data + props->tucso;
 580
 581            phsum = lduw_be_p(sp) + len;
 582            phsum = (phsum >> 16) + (phsum & 0xffff);
 583            stw_be_p(sp, phsum);
 584        }
 585        tp->tso_frames++;
 586    }
 587
 588    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 589        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 590    }
 591    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 592        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 593    }
 594    if (tp->vlan_needed) {
 595        memmove(tp->vlan, tp->data, 4);
 596        memmove(tp->data, tp->data + 4, 8);
 597        memcpy(tp->data + 8, tp->vlan_header, 4);
 598        e1000_send_packet(s, tp->vlan, tp->size + 4);
 599    } else {
 600        e1000_send_packet(s, tp->data, tp->size);
 601    }
 602
 603    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 604    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 605    s->mac_reg[GPTC] = s->mac_reg[TPT];
 606    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 607    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 608}
 609
 610static void
 611process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 612{
 613    PCIDevice *d = PCI_DEVICE(s);
 614    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 615    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 616    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 617    unsigned int msh = 0xfffff;
 618    uint64_t addr;
 619    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 620    struct e1000_tx *tp = &s->tx;
 621
 622    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 623    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 624        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 625            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 626            s->use_tso_for_migration = 1;
 627            tp->tso_frames = 0;
 628        } else {
 629            e1000x_read_tx_ctx_descr(xp, &tp->props);
 630            s->use_tso_for_migration = 0;
 631        }
 632        return;
 633    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 634        // data descriptor
 635        if (tp->size == 0) {
 636            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 637        }
 638        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 639    } else {
 640        // legacy descriptor
 641        tp->cptse = 0;
 642    }
 643
 644    if (e1000x_vlan_enabled(s->mac_reg) &&
 645        e1000x_is_vlan_txd(txd_lower) &&
 646        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 647        tp->vlan_needed = 1;
 648        stw_be_p(tp->vlan_header,
 649                      le16_to_cpu(s->mac_reg[VET]));
 650        stw_be_p(tp->vlan_header + 2,
 651                      le16_to_cpu(dp->upper.fields.special));
 652    }
 653
 654    addr = le64_to_cpu(dp->buffer_addr);
 655    if (tp->cptse) {
 656        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 657        do {
 658            bytes = split_size;
 659            if (tp->size + bytes > msh)
 660                bytes = msh - tp->size;
 661
 662            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 663            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 664            sz = tp->size + bytes;
 665            if (sz >= tp->tso_props.hdr_len
 666                && tp->size < tp->tso_props.hdr_len) {
 667                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 668            }
 669            tp->size = sz;
 670            addr += bytes;
 671            if (sz == msh) {
 672                xmit_seg(s);
 673                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 674                tp->size = tp->tso_props.hdr_len;
 675            }
 676            split_size -= bytes;
 677        } while (bytes && split_size);
 678    } else {
 679        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 680        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 681        tp->size += split_size;
 682    }
 683
 684    if (!(txd_lower & E1000_TXD_CMD_EOP))
 685        return;
 686    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 687        xmit_seg(s);
 688    }
 689    tp->tso_frames = 0;
 690    tp->sum_needed = 0;
 691    tp->vlan_needed = 0;
 692    tp->size = 0;
 693    tp->cptse = 0;
 694}
 695
 696static uint32_t
 697txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 698{
 699    PCIDevice *d = PCI_DEVICE(s);
 700    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 701
 702    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 703        return 0;
 704    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 705                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 706    dp->upper.data = cpu_to_le32(txd_upper);
 707    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 708                  &dp->upper, sizeof(dp->upper));
 709    return E1000_ICR_TXDW;
 710}
 711
 712static uint64_t tx_desc_base(E1000State *s)
 713{
 714    uint64_t bah = s->mac_reg[TDBAH];
 715    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 716
 717    return (bah << 32) + bal;
 718}
 719
 720static void
 721start_xmit(E1000State *s)
 722{
 723    PCIDevice *d = PCI_DEVICE(s);
 724    dma_addr_t base;
 725    struct e1000_tx_desc desc;
 726    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 727
 728    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 729        DBGOUT(TX, "tx disabled\n");
 730        return;
 731    }
 732
 733    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 734        base = tx_desc_base(s) +
 735               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 736        pci_dma_read(d, base, &desc, sizeof(desc));
 737
 738        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 739               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 740               desc.upper.data);
 741
 742        process_tx_desc(s, &desc);
 743        cause |= txdesc_writeback(s, base, &desc);
 744
 745        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 746            s->mac_reg[TDH] = 0;
 747        /*
 748         * the following could happen only if guest sw assigns
 749         * bogus values to TDT/TDLEN.
 750         * there's nothing too intelligent we could do about this.
 751         */
 752        if (s->mac_reg[TDH] == tdh_start ||
 753            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 754            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 755                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 756            break;
 757        }
 758    }
 759    set_ics(s, 0, cause);
 760}
 761
 762static int
 763receive_filter(E1000State *s, const uint8_t *buf, int size)
 764{
 765    uint32_t rctl = s->mac_reg[RCTL];
 766    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 767
 768    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 769        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 770        uint16_t vid = lduw_be_p(buf + 14);
 771        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 772                                 ((vid >> 5) & 0x7f));
 773        if ((vfta & (1 << (vid & 0x1f))) == 0)
 774            return 0;
 775    }
 776
 777    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 778        return 1;
 779    }
 780
 781    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 782        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 783        return 1;
 784    }
 785
 786    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 787        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 788        return 1;
 789    }
 790
 791    return e1000x_rx_group_filter(s->mac_reg, buf);
 792}
 793
 794static void
 795e1000_set_link_status(NetClientState *nc)
 796{
 797    E1000State *s = qemu_get_nic_opaque(nc);
 798    uint32_t old_status = s->mac_reg[STATUS];
 799
 800    if (nc->link_down) {
 801        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 802    } else {
 803        if (have_autoneg(s) &&
 804            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 805            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 806        } else {
 807            e1000_link_up(s);
 808        }
 809    }
 810
 811    if (s->mac_reg[STATUS] != old_status)
 812        set_ics(s, 0, E1000_ICR_LSC);
 813}
 814
 815static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 816{
 817    int bufs;
 818    /* Fast-path short packets */
 819    if (total_size <= s->rxbuf_size) {
 820        return s->mac_reg[RDH] != s->mac_reg[RDT];
 821    }
 822    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 823        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 824    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 825        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 826            s->mac_reg[RDT] - s->mac_reg[RDH];
 827    } else {
 828        return false;
 829    }
 830    return total_size <= bufs * s->rxbuf_size;
 831}
 832
 833static int
 834e1000_can_receive(NetClientState *nc)
 835{
 836    E1000State *s = qemu_get_nic_opaque(nc);
 837
 838    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 839        e1000_has_rxbufs(s, 1);
 840}
 841
 842static uint64_t rx_desc_base(E1000State *s)
 843{
 844    uint64_t bah = s->mac_reg[RDBAH];
 845    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 846
 847    return (bah << 32) + bal;
 848}
 849
 850static ssize_t
 851e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 852{
 853    E1000State *s = qemu_get_nic_opaque(nc);
 854    PCIDevice *d = PCI_DEVICE(s);
 855    struct e1000_rx_desc desc;
 856    dma_addr_t base;
 857    unsigned int n, rdt;
 858    uint32_t rdh_start;
 859    uint16_t vlan_special = 0;
 860    uint8_t vlan_status = 0;
 861    uint8_t min_buf[MIN_BUF_SIZE];
 862    struct iovec min_iov;
 863    uint8_t *filter_buf = iov->iov_base;
 864    size_t size = iov_size(iov, iovcnt);
 865    size_t iov_ofs = 0;
 866    size_t desc_offset;
 867    size_t desc_size;
 868    size_t total_size;
 869
 870    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 871        return -1;
 872    }
 873
 874    /* Pad to minimum Ethernet frame length */
 875    if (size < sizeof(min_buf)) {
 876        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 877        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 878        e1000x_inc_reg_if_not_full(s->mac_reg, RUC);
 879        min_iov.iov_base = filter_buf = min_buf;
 880        min_iov.iov_len = size = sizeof(min_buf);
 881        iovcnt = 1;
 882        iov = &min_iov;
 883    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 884        /* This is very unlikely, but may happen. */
 885        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 886        filter_buf = min_buf;
 887    }
 888
 889    /* Discard oversized packets if !LPE and !SBP. */
 890    if (e1000x_is_oversized(s->mac_reg, size)) {
 891        return size;
 892    }
 893
 894    if (!receive_filter(s, filter_buf, size)) {
 895        return size;
 896    }
 897
 898    if (e1000x_vlan_enabled(s->mac_reg) &&
 899        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 900        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 901        iov_ofs = 4;
 902        if (filter_buf == iov->iov_base) {
 903            memmove(filter_buf + 4, filter_buf, 12);
 904        } else {
 905            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 906            while (iov->iov_len <= iov_ofs) {
 907                iov_ofs -= iov->iov_len;
 908                iov++;
 909            }
 910        }
 911        vlan_status = E1000_RXD_STAT_VP;
 912        size -= 4;
 913    }
 914
 915    rdh_start = s->mac_reg[RDH];
 916    desc_offset = 0;
 917    total_size = size + e1000x_fcs_len(s->mac_reg);
 918    if (!e1000_has_rxbufs(s, total_size)) {
 919            set_ics(s, 0, E1000_ICS_RXO);
 920            return -1;
 921    }
 922    do {
 923        desc_size = total_size - desc_offset;
 924        if (desc_size > s->rxbuf_size) {
 925            desc_size = s->rxbuf_size;
 926        }
 927        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 928        pci_dma_read(d, base, &desc, sizeof(desc));
 929        desc.special = vlan_special;
 930        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 931        if (desc.buffer_addr) {
 932            if (desc_offset < size) {
 933                size_t iov_copy;
 934                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 935                size_t copy_size = size - desc_offset;
 936                if (copy_size > s->rxbuf_size) {
 937                    copy_size = s->rxbuf_size;
 938                }
 939                do {
 940                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 941                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 942                    copy_size -= iov_copy;
 943                    ba += iov_copy;
 944                    iov_ofs += iov_copy;
 945                    if (iov_ofs == iov->iov_len) {
 946                        iov++;
 947                        iov_ofs = 0;
 948                    }
 949                } while (copy_size);
 950            }
 951            desc_offset += desc_size;
 952            desc.length = cpu_to_le16(desc_size);
 953            if (desc_offset >= total_size) {
 954                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 955            } else {
 956                /* Guest zeroing out status is not a hardware requirement.
 957                   Clear EOP in case guest didn't do it. */
 958                desc.status &= ~E1000_RXD_STAT_EOP;
 959            }
 960        } else { // as per intel docs; skip descriptors with null buf addr
 961            DBGOUT(RX, "Null RX descriptor!!\n");
 962        }
 963        pci_dma_write(d, base, &desc, sizeof(desc));
 964
 965        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 966            s->mac_reg[RDH] = 0;
 967        /* see comment in start_xmit; same here */
 968        if (s->mac_reg[RDH] == rdh_start ||
 969            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
 970            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
 971                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
 972            set_ics(s, 0, E1000_ICS_RXO);
 973            return -1;
 974        }
 975    } while (desc_offset < total_size);
 976
 977    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
 978
 979    n = E1000_ICS_RXT0;
 980    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
 981        rdt += s->mac_reg[RDLEN] / sizeof(desc);
 982    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
 983        s->rxbuf_min_shift)
 984        n |= E1000_ICS_RXDMT0;
 985
 986    set_ics(s, 0, n);
 987
 988    return size;
 989}
 990
 991static ssize_t
 992e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 993{
 994    const struct iovec iov = {
 995        .iov_base = (uint8_t *)buf,
 996        .iov_len = size
 997    };
 998
 999    return e1000_receive_iov(nc, &iov, 1);
1000}
1001
1002static uint32_t
1003mac_readreg(E1000State *s, int index)
1004{
1005    return s->mac_reg[index];
1006}
1007
1008static uint32_t
1009mac_low4_read(E1000State *s, int index)
1010{
1011    return s->mac_reg[index] & 0xf;
1012}
1013
1014static uint32_t
1015mac_low11_read(E1000State *s, int index)
1016{
1017    return s->mac_reg[index] & 0x7ff;
1018}
1019
1020static uint32_t
1021mac_low13_read(E1000State *s, int index)
1022{
1023    return s->mac_reg[index] & 0x1fff;
1024}
1025
1026static uint32_t
1027mac_low16_read(E1000State *s, int index)
1028{
1029    return s->mac_reg[index] & 0xffff;
1030}
1031
1032static uint32_t
1033mac_icr_read(E1000State *s, int index)
1034{
1035    uint32_t ret = s->mac_reg[ICR];
1036
1037    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1038    set_interrupt_cause(s, 0, 0);
1039    return ret;
1040}
1041
1042static uint32_t
1043mac_read_clr4(E1000State *s, int index)
1044{
1045    uint32_t ret = s->mac_reg[index];
1046
1047    s->mac_reg[index] = 0;
1048    return ret;
1049}
1050
1051static uint32_t
1052mac_read_clr8(E1000State *s, int index)
1053{
1054    uint32_t ret = s->mac_reg[index];
1055
1056    s->mac_reg[index] = 0;
1057    s->mac_reg[index-1] = 0;
1058    return ret;
1059}
1060
1061static void
1062mac_writereg(E1000State *s, int index, uint32_t val)
1063{
1064    uint32_t macaddr[2];
1065
1066    s->mac_reg[index] = val;
1067
1068    if (index == RA + 1) {
1069        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1070        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1071        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1072    }
1073}
1074
1075static void
1076set_rdt(E1000State *s, int index, uint32_t val)
1077{
1078    s->mac_reg[index] = val & 0xffff;
1079    if (e1000_has_rxbufs(s, 1)) {
1080        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1081    }
1082}
1083
1084static void
1085set_16bit(E1000State *s, int index, uint32_t val)
1086{
1087    s->mac_reg[index] = val & 0xffff;
1088}
1089
1090static void
1091set_dlen(E1000State *s, int index, uint32_t val)
1092{
1093    s->mac_reg[index] = val & 0xfff80;
1094}
1095
1096static void
1097set_tctl(E1000State *s, int index, uint32_t val)
1098{
1099    s->mac_reg[index] = val;
1100    s->mac_reg[TDT] &= 0xffff;
1101    start_xmit(s);
1102}
1103
1104static void
1105set_icr(E1000State *s, int index, uint32_t val)
1106{
1107    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1108    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1109}
1110
1111static void
1112set_imc(E1000State *s, int index, uint32_t val)
1113{
1114    s->mac_reg[IMS] &= ~val;
1115    set_ics(s, 0, 0);
1116}
1117
1118static void
1119set_ims(E1000State *s, int index, uint32_t val)
1120{
1121    s->mac_reg[IMS] |= val;
1122    set_ics(s, 0, 0);
1123}
1124
1125#define getreg(x)    [x] = mac_readreg
1126static uint32_t (*macreg_readops[])(E1000State *, int) = {
1127    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1128    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1129    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1130    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1131    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1132    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1133    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1134    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1135    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1136    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1137    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1138    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1139    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1140    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1141    getreg(GOTCL),
1142
1143    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1144    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1145    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1146    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1147    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1148    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1149    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1150    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1151    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1152    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1153    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1154    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1155    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1156    [MPTC]    = mac_read_clr4,
1157    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1158    [EERD]    = flash_eerd_read,
1159    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1160    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1161    [RDFPC]   = mac_low13_read,
1162    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1163    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1164    [TDFPC]   = mac_low13_read,
1165    [AIT]     = mac_low16_read,
1166
1167    [CRCERRS ... MPC]   = &mac_readreg,
1168    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1169    [FFLT ... FFLT+6]   = &mac_low11_read,
1170    [RA ... RA+31]      = &mac_readreg,
1171    [WUPM ... WUPM+31]  = &mac_readreg,
1172    [MTA ... MTA+127]   = &mac_readreg,
1173    [VFTA ... VFTA+127] = &mac_readreg,
1174    [FFMT ... FFMT+254] = &mac_low4_read,
1175    [FFVT ... FFVT+254] = &mac_readreg,
1176    [PBM ... PBM+16383] = &mac_readreg,
1177};
1178enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1179
1180#define putreg(x)    [x] = mac_writereg
1181static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1182    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1183    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1184    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1185    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1186    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1187    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1188    putreg(WUS),      putreg(AIT),
1189
1190    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1191    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1192    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1193    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1194    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1195    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1196    [ITR]    = set_16bit,
1197
1198    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1199    [FFLT ... FFLT+6]   = &mac_writereg,
1200    [RA ... RA+31]      = &mac_writereg,
1201    [WUPM ... WUPM+31]  = &mac_writereg,
1202    [MTA ... MTA+127]   = &mac_writereg,
1203    [VFTA ... VFTA+127] = &mac_writereg,
1204    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1205    [PBM ... PBM+16383] = &mac_writereg,
1206};
1207
1208enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1209
1210enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1211
1212#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1213/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1214 * f - flag bits (up to 6 possible flags)
1215 * n - flag needed
1216 * p - partially implenented */
1217static const uint8_t mac_reg_access[0x8000] = {
1218    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1219    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1220
1221    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1222    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1223    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1224    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1225    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1226    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1227    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1228    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1229    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1230    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1231    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1232    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1233    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1234    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1235    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1236    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1237    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1238    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1239    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1240    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1241    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1242    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1243    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1244    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1245    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1246    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1247    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1248    [BPTC]    = markflag(MAC),
1249
1250    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1251    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1252    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1253    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1254    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1255    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1256    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1257    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1258    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1259    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1260    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1261};
1262
1263static void
1264e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1265                 unsigned size)
1266{
1267    E1000State *s = opaque;
1268    unsigned int index = (addr & 0x1ffff) >> 2;
1269
1270    if (index < NWRITEOPS && macreg_writeops[index]) {
1271        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1272            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1273            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1274                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1275                       "It is not fully implemented.\n", index<<2);
1276            }
1277            macreg_writeops[index](s, index, val);
1278        } else {    /* "flag needed" bit is set, but the flag is not active */
1279            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1280                   index<<2);
1281        }
1282    } else if (index < NREADOPS && macreg_readops[index]) {
1283        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1284               index<<2, val);
1285    } else {
1286        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1287               index<<2, val);
1288    }
1289}
1290
1291static uint64_t
1292e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1293{
1294    E1000State *s = opaque;
1295    unsigned int index = (addr & 0x1ffff) >> 2;
1296
1297    if (index < NREADOPS && macreg_readops[index]) {
1298        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1299            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1300            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1301                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1302                       "It is not fully implemented.\n", index<<2);
1303            }
1304            return macreg_readops[index](s, index);
1305        } else {    /* "flag needed" bit is set, but the flag is not active */
1306            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1307                   index<<2);
1308        }
1309    } else {
1310        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1311    }
1312    return 0;
1313}
1314
1315static const MemoryRegionOps e1000_mmio_ops = {
1316    .read = e1000_mmio_read,
1317    .write = e1000_mmio_write,
1318    .endianness = DEVICE_LITTLE_ENDIAN,
1319    .impl = {
1320        .min_access_size = 4,
1321        .max_access_size = 4,
1322    },
1323};
1324
1325static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1326                              unsigned size)
1327{
1328    E1000State *s = opaque;
1329
1330    (void)s;
1331    return 0;
1332}
1333
1334static void e1000_io_write(void *opaque, hwaddr addr,
1335                           uint64_t val, unsigned size)
1336{
1337    E1000State *s = opaque;
1338
1339    (void)s;
1340}
1341
1342static const MemoryRegionOps e1000_io_ops = {
1343    .read = e1000_io_read,
1344    .write = e1000_io_write,
1345    .endianness = DEVICE_LITTLE_ENDIAN,
1346};
1347
1348static bool is_version_1(void *opaque, int version_id)
1349{
1350    return version_id == 1;
1351}
1352
1353static int e1000_pre_save(void *opaque)
1354{
1355    E1000State *s = opaque;
1356    NetClientState *nc = qemu_get_queue(s->nic);
1357
1358    /* If the mitigation timer is active, emulate a timeout now. */
1359    if (s->mit_timer_on) {
1360        e1000_mit_timer(s);
1361    }
1362
1363    /*
1364     * If link is down and auto-negotiation is supported and ongoing,
1365     * complete auto-negotiation immediately. This allows us to look
1366     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1367     */
1368    if (nc->link_down && have_autoneg(s)) {
1369        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1370    }
1371
1372    /* Decide which set of props to migrate in the main structure */
1373    if (chkflag(TSO) || !s->use_tso_for_migration) {
1374        /* Either we're migrating with the extra subsection, in which
1375         * case the mig_props is always 'props' OR
1376         * we've not got the subsection, but 'props' was the last
1377         * updated.
1378         */
1379        s->mig_props = s->tx.props;
1380    } else {
1381        /* We're not using the subsection, and 'tso_props' was
1382         * the last updated.
1383         */
1384        s->mig_props = s->tx.tso_props;
1385    }
1386    return 0;
1387}
1388
1389static int e1000_post_load(void *opaque, int version_id)
1390{
1391    E1000State *s = opaque;
1392    NetClientState *nc = qemu_get_queue(s->nic);
1393
1394    if (!chkflag(MIT)) {
1395        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1396            s->mac_reg[TADV] = 0;
1397        s->mit_irq_level = false;
1398    }
1399    s->mit_ide = 0;
1400    s->mit_timer_on = false;
1401
1402    /* nc.link_down can't be migrated, so infer link_down according
1403     * to link status bit in mac_reg[STATUS].
1404     * Alternatively, restart link negotiation if it was in progress. */
1405    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1406
1407    if (have_autoneg(s) &&
1408        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1409        nc->link_down = false;
1410        timer_mod(s->autoneg_timer,
1411                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1412    }
1413
1414    s->tx.props = s->mig_props;
1415    if (!s->received_tx_tso) {
1416        /* We received only one set of offload data (tx.props)
1417         * and haven't got tx.tso_props.  The best we can do
1418         * is dupe the data.
1419         */
1420        s->tx.tso_props = s->mig_props;
1421    }
1422    return 0;
1423}
1424
1425static int e1000_tx_tso_post_load(void *opaque, int version_id)
1426{
1427    E1000State *s = opaque;
1428    s->received_tx_tso = true;
1429    return 0;
1430}
1431
1432static bool e1000_mit_state_needed(void *opaque)
1433{
1434    E1000State *s = opaque;
1435
1436    return chkflag(MIT);
1437}
1438
1439static bool e1000_full_mac_needed(void *opaque)
1440{
1441    E1000State *s = opaque;
1442
1443    return chkflag(MAC);
1444}
1445
1446static bool e1000_tso_state_needed(void *opaque)
1447{
1448    E1000State *s = opaque;
1449
1450    return chkflag(TSO);
1451}
1452
1453static const VMStateDescription vmstate_e1000_mit_state = {
1454    .name = "e1000/mit_state",
1455    .version_id = 1,
1456    .minimum_version_id = 1,
1457    .needed = e1000_mit_state_needed,
1458    .fields = (VMStateField[]) {
1459        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1460        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1461        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1462        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1463        VMSTATE_BOOL(mit_irq_level, E1000State),
1464        VMSTATE_END_OF_LIST()
1465    }
1466};
1467
1468static const VMStateDescription vmstate_e1000_full_mac_state = {
1469    .name = "e1000/full_mac_state",
1470    .version_id = 1,
1471    .minimum_version_id = 1,
1472    .needed = e1000_full_mac_needed,
1473    .fields = (VMStateField[]) {
1474        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1475        VMSTATE_END_OF_LIST()
1476    }
1477};
1478
1479static const VMStateDescription vmstate_e1000_tx_tso_state = {
1480    .name = "e1000/tx_tso_state",
1481    .version_id = 1,
1482    .minimum_version_id = 1,
1483    .needed = e1000_tso_state_needed,
1484    .post_load = e1000_tx_tso_post_load,
1485    .fields = (VMStateField[]) {
1486        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1487        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1488        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1489        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1490        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1491        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1492        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1493        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1494        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1495        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1496        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1497        VMSTATE_END_OF_LIST()
1498    }
1499};
1500
1501static const VMStateDescription vmstate_e1000 = {
1502    .name = "e1000",
1503    .version_id = 2,
1504    .minimum_version_id = 1,
1505    .pre_save = e1000_pre_save,
1506    .post_load = e1000_post_load,
1507    .fields = (VMStateField[]) {
1508        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1509        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1510        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1511        VMSTATE_UINT32(rxbuf_size, E1000State),
1512        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1513        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1514        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1515        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1516        VMSTATE_UINT16(eecd_state.reading, E1000State),
1517        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1518        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1519        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1520        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1521        VMSTATE_UINT8(mig_props.tucss, E1000State),
1522        VMSTATE_UINT8(mig_props.tucso, E1000State),
1523        VMSTATE_UINT16(mig_props.tucse, E1000State),
1524        VMSTATE_UINT32(mig_props.paylen, E1000State),
1525        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1526        VMSTATE_UINT16(mig_props.mss, E1000State),
1527        VMSTATE_UINT16(tx.size, E1000State),
1528        VMSTATE_UINT16(tx.tso_frames, E1000State),
1529        VMSTATE_UINT8(tx.sum_needed, E1000State),
1530        VMSTATE_INT8(mig_props.ip, E1000State),
1531        VMSTATE_INT8(mig_props.tcp, E1000State),
1532        VMSTATE_BUFFER(tx.header, E1000State),
1533        VMSTATE_BUFFER(tx.data, E1000State),
1534        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1535        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1536        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1537        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1538        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1539        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1540        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1541        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1542        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1543        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1544        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1545        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1546        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1547        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1548        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1549        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1550        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1551        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1552        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1553        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1554        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1555        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1556        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1557        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1558        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1559        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1560        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1561        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1562        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1563        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1564        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1565        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1566        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1567        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1568        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1569        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1570        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1571        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1572        VMSTATE_UINT32(mac_reg[VET], E1000State),
1573        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1574        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1575        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1576        VMSTATE_END_OF_LIST()
1577    },
1578    .subsections = (const VMStateDescription*[]) {
1579        &vmstate_e1000_mit_state,
1580        &vmstate_e1000_full_mac_state,
1581        &vmstate_e1000_tx_tso_state,
1582        NULL
1583    }
1584};
1585
1586/*
1587 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1588 * Note: A valid DevId will be inserted during pci_e1000_init().
1589 */
1590static const uint16_t e1000_eeprom_template[64] = {
1591    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1592    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1593    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1594    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1595    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1596    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1597    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1598    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1599};
1600
1601/* PCI interface */
1602
1603static void
1604e1000_mmio_setup(E1000State *d)
1605{
1606    int i;
1607    const uint32_t excluded_regs[] = {
1608        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1609        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1610    };
1611
1612    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1613                          "e1000-mmio", PNPMMIO_SIZE);
1614    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1615    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1616        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1617                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1618    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1619}
1620
1621static void
1622pci_e1000_uninit(PCIDevice *dev)
1623{
1624    E1000State *d = E1000(dev);
1625
1626    timer_del(d->autoneg_timer);
1627    timer_free(d->autoneg_timer);
1628    timer_del(d->mit_timer);
1629    timer_free(d->mit_timer);
1630    qemu_del_nic(d->nic);
1631}
1632
1633static NetClientInfo net_e1000_info = {
1634    .type = NET_CLIENT_DRIVER_NIC,
1635    .size = sizeof(NICState),
1636    .can_receive = e1000_can_receive,
1637    .receive = e1000_receive,
1638    .receive_iov = e1000_receive_iov,
1639    .link_status_changed = e1000_set_link_status,
1640};
1641
1642static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1643                                uint32_t val, int len)
1644{
1645    E1000State *s = E1000(pci_dev);
1646
1647    pci_default_write_config(pci_dev, address, val, len);
1648
1649    if (range_covers_byte(address, len, PCI_COMMAND) &&
1650        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1651        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1652    }
1653}
1654
1655static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1656{
1657    DeviceState *dev = DEVICE(pci_dev);
1658    E1000State *d = E1000(pci_dev);
1659    uint8_t *pci_conf;
1660    uint8_t *macaddr;
1661
1662    pci_dev->config_write = e1000_write_config;
1663
1664    pci_conf = pci_dev->config;
1665
1666    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1667    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1668
1669    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1670
1671    e1000_mmio_setup(d);
1672
1673    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1674
1675    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1676
1677    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1678    macaddr = d->conf.macaddr.a;
1679
1680    e1000x_core_prepare_eeprom(d->eeprom_data,
1681                               e1000_eeprom_template,
1682                               sizeof(e1000_eeprom_template),
1683                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1684                               macaddr);
1685
1686    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1687                          object_get_typename(OBJECT(d)), dev->id, d);
1688
1689    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1690
1691    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1692    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1693}
1694
1695static void qdev_e1000_reset(DeviceState *dev)
1696{
1697    E1000State *d = E1000(dev);
1698    e1000_reset(d);
1699}
1700
1701static Property e1000_properties[] = {
1702    DEFINE_NIC_PROPERTIES(E1000State, conf),
1703    DEFINE_PROP_BIT("autonegotiation", E1000State,
1704                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1705    DEFINE_PROP_BIT("mitigation", E1000State,
1706                    compat_flags, E1000_FLAG_MIT_BIT, true),
1707    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1708                    compat_flags, E1000_FLAG_MAC_BIT, true),
1709    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1710                    compat_flags, E1000_FLAG_TSO_BIT, true),
1711    DEFINE_PROP_END_OF_LIST(),
1712};
1713
1714typedef struct E1000Info {
1715    const char *name;
1716    uint16_t   device_id;
1717    uint8_t    revision;
1718    uint16_t   phy_id2;
1719} E1000Info;
1720
1721static void e1000_class_init(ObjectClass *klass, void *data)
1722{
1723    DeviceClass *dc = DEVICE_CLASS(klass);
1724    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1725    E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1726    const E1000Info *info = data;
1727
1728    k->realize = pci_e1000_realize;
1729    k->exit = pci_e1000_uninit;
1730    k->romfile = "efi-e1000.rom";
1731    k->vendor_id = PCI_VENDOR_ID_INTEL;
1732    k->device_id = info->device_id;
1733    k->revision = info->revision;
1734    e->phy_id2 = info->phy_id2;
1735    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1736    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1737    dc->desc = "Intel Gigabit Ethernet";
1738    dc->reset = qdev_e1000_reset;
1739    dc->vmsd = &vmstate_e1000;
1740    dc->props = e1000_properties;
1741}
1742
1743static void e1000_instance_init(Object *obj)
1744{
1745    E1000State *n = E1000(obj);
1746    device_add_bootindex_property(obj, &n->conf.bootindex,
1747                                  "bootindex", "/ethernet-phy@0",
1748                                  DEVICE(n), NULL);
1749}
1750
1751static const TypeInfo e1000_base_info = {
1752    .name          = TYPE_E1000_BASE,
1753    .parent        = TYPE_PCI_DEVICE,
1754    .instance_size = sizeof(E1000State),
1755    .instance_init = e1000_instance_init,
1756    .class_size    = sizeof(E1000BaseClass),
1757    .abstract      = true,
1758    .interfaces = (InterfaceInfo[]) {
1759        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1760        { },
1761    },
1762};
1763
1764static const E1000Info e1000_devices[] = {
1765    {
1766        .name      = "e1000",
1767        .device_id = E1000_DEV_ID_82540EM,
1768        .revision  = 0x03,
1769        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1770    },
1771    {
1772        .name      = "e1000-82544gc",
1773        .device_id = E1000_DEV_ID_82544GC_COPPER,
1774        .revision  = 0x03,
1775        .phy_id2   = E1000_PHY_ID2_82544x,
1776    },
1777    {
1778        .name      = "e1000-82545em",
1779        .device_id = E1000_DEV_ID_82545EM_COPPER,
1780        .revision  = 0x03,
1781        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1782    },
1783};
1784
1785static void e1000_register_types(void)
1786{
1787    int i;
1788
1789    type_register_static(&e1000_base_info);
1790    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1791        const E1000Info *info = &e1000_devices[i];
1792        TypeInfo type_info = {};
1793
1794        type_info.name = info->name;
1795        type_info.parent = TYPE_E1000_BASE;
1796        type_info.class_data = (void *)info;
1797        type_info.class_init = e1000_class_init;
1798        type_info.instance_init = e1000_instance_init;
1799
1800        type_register(&type_info);
1801    }
1802}
1803
1804type_init(e1000_register_types)
1805