qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2.1 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/pci/pci.h"
  30#include "hw/qdev-properties.h"
  31#include "migration/vmstate.h"
  32#include "net/eth.h"
  33#include "net/net.h"
  34#include "net/checksum.h"
  35#include "sysemu/sysemu.h"
  36#include "sysemu/dma.h"
  37#include "qemu/iov.h"
  38#include "qemu/module.h"
  39#include "qemu/range.h"
  40
  41#include "e1000x_common.h"
  42#include "trace.h"
  43#include "qom/object.h"
  44
  45static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  46
  47/* #define E1000_DEBUG */
  48
  49#ifdef E1000_DEBUG
  50enum {
  51    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  52    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  53    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  54    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  55};
  56#define DBGBIT(x)    (1<<DEBUG_##x)
  57static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  58
  59#define DBGOUT(what, fmt, ...) do { \
  60    if (debugflags & DBGBIT(what)) \
  61        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  62    } while (0)
  63#else
  64#define DBGOUT(what, fmt, ...) do {} while (0)
  65#endif
  66
  67#define IOPORT_SIZE       0x40
  68#define PNPMMIO_SIZE      0x20000
  69#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  70
  71#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  72
  73/*
  74 * HW models:
  75 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  76 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  77 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  78 *  Others never tested
  79 */
  80
  81struct E1000State_st {
  82    /*< private >*/
  83    PCIDevice parent_obj;
  84    /*< public >*/
  85
  86    NICState *nic;
  87    NICConf conf;
  88    MemoryRegion mmio;
  89    MemoryRegion io;
  90
  91    uint32_t mac_reg[0x8000];
  92    uint16_t phy_reg[0x20];
  93    uint16_t eeprom_data[64];
  94
  95    uint32_t rxbuf_size;
  96    uint32_t rxbuf_min_shift;
  97    struct e1000_tx {
  98        unsigned char header[256];
  99        unsigned char vlan_header[4];
 100        /* Fields vlan and data must not be reordered or separated. */
 101        unsigned char vlan[4];
 102        unsigned char data[0x10000];
 103        uint16_t size;
 104        unsigned char vlan_needed;
 105        unsigned char sum_needed;
 106        bool cptse;
 107        e1000x_txd_props props;
 108        e1000x_txd_props tso_props;
 109        uint16_t tso_frames;
 110        bool busy;
 111    } tx;
 112
 113    struct {
 114        uint32_t val_in;    /* shifted in from guest driver */
 115        uint16_t bitnum_in;
 116        uint16_t bitnum_out;
 117        uint16_t reading;
 118        uint32_t old_eecd;
 119    } eecd_state;
 120
 121    QEMUTimer *autoneg_timer;
 122
 123    QEMUTimer *mit_timer;      /* Mitigation timer. */
 124    bool mit_timer_on;         /* Mitigation timer is running. */
 125    bool mit_irq_level;        /* Tracks interrupt pin level. */
 126    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 127
 128    QEMUTimer *flush_queue_timer;
 129
 130/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 131#define E1000_FLAG_AUTONEG_BIT 0
 132#define E1000_FLAG_MIT_BIT 1
 133#define E1000_FLAG_MAC_BIT 2
 134#define E1000_FLAG_TSO_BIT 3
 135#define E1000_FLAG_VET_BIT 4
 136#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 137#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 138#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 139#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 140#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
 141
 142    uint32_t compat_flags;
 143    bool received_tx_tso;
 144    bool use_tso_for_migration;
 145    e1000x_txd_props mig_props;
 146};
 147typedef struct E1000State_st E1000State;
 148
 149#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 150
 151struct E1000BaseClass {
 152    PCIDeviceClass parent_class;
 153    uint16_t phy_id2;
 154};
 155typedef struct E1000BaseClass E1000BaseClass;
 156
 157#define TYPE_E1000_BASE "e1000-base"
 158
 159DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
 160                     E1000, TYPE_E1000_BASE)
 161
 162
 163static void
 164e1000_link_up(E1000State *s)
 165{
 166    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 167
 168    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 169    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 170}
 171
 172static void
 173e1000_autoneg_done(E1000State *s)
 174{
 175    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 176
 177    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 178    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 179}
 180
 181static bool
 182have_autoneg(E1000State *s)
 183{
 184    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 185}
 186
 187static void
 188set_phy_ctrl(E1000State *s, int index, uint16_t val)
 189{
 190    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 191    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 192                                   MII_CR_RESET |
 193                                   MII_CR_RESTART_AUTO_NEG);
 194
 195    /*
 196     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 197     * migrate during auto negotiation, after migration the link will be
 198     * down.
 199     */
 200    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 201        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 202    }
 203}
 204
 205static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 206    [PHY_CTRL] = set_phy_ctrl,
 207};
 208
 209enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 210
 211enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 212static const char phy_regcap[0x20] = {
 213    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 214    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 215    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 216    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 217    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 218    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 219    [PHY_AUTONEG_EXP] = PHY_R,
 220};
 221
 222/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 223static const uint16_t phy_reg_init[] = {
 224    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 225                   MII_CR_FULL_DUPLEX |
 226                   MII_CR_AUTO_NEG_EN,
 227
 228    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 229                   MII_SR_LINK_STATUS |   /* link initially up */
 230                   MII_SR_AUTONEG_CAPS |
 231                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 232                   MII_SR_PREAMBLE_SUPPRESS |
 233                   MII_SR_EXTENDED_STATUS |
 234                   MII_SR_10T_HD_CAPS |
 235                   MII_SR_10T_FD_CAPS |
 236                   MII_SR_100X_HD_CAPS |
 237                   MII_SR_100X_FD_CAPS,
 238
 239    [PHY_ID1] = 0x141,
 240    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 241    [PHY_AUTONEG_ADV] = 0xde1,
 242    [PHY_LP_ABILITY] = 0x1e0,
 243    [PHY_1000T_CTRL] = 0x0e00,
 244    [PHY_1000T_STATUS] = 0x3c00,
 245    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 246    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 247    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 248};
 249
 250static const uint32_t mac_reg_init[] = {
 251    [PBA]     = 0x00100030,
 252    [LEDCTL]  = 0x602,
 253    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 254                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 255    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 256                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 257                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 258                E1000_STATUS_LU,
 259    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 260                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 261                E1000_MANC_RMCP_EN,
 262};
 263
 264/* Helper function, *curr == 0 means the value is not set */
 265static inline void
 266mit_update_delay(uint32_t *curr, uint32_t value)
 267{
 268    if (value && (*curr == 0 || value < *curr)) {
 269        *curr = value;
 270    }
 271}
 272
 273static void
 274set_interrupt_cause(E1000State *s, int index, uint32_t val)
 275{
 276    PCIDevice *d = PCI_DEVICE(s);
 277    uint32_t pending_ints;
 278    uint32_t mit_delay;
 279
 280    s->mac_reg[ICR] = val;
 281
 282    /*
 283     * Make sure ICR and ICS registers have the same value.
 284     * The spec says that the ICS register is write-only.  However in practice,
 285     * on real hardware ICS is readable, and for reads it has the same value as
 286     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 287     *
 288     * The VxWorks PRO/1000 driver uses this behaviour.
 289     */
 290    s->mac_reg[ICS] = val;
 291
 292    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 293    if (!s->mit_irq_level && pending_ints) {
 294        /*
 295         * Here we detect a potential raising edge. We postpone raising the
 296         * interrupt line if we are inside the mitigation delay window
 297         * (s->mit_timer_on == 1).
 298         * We provide a partial implementation of interrupt mitigation,
 299         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 300         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 301         * RADV; relative timers based on TIDV and RDTR are not implemented.
 302         */
 303        if (s->mit_timer_on) {
 304            return;
 305        }
 306        if (chkflag(MIT)) {
 307            /* Compute the next mitigation delay according to pending
 308             * interrupts and the current values of RADV (provided
 309             * RDTR!=0), TADV and ITR.
 310             * Then rearm the timer.
 311             */
 312            mit_delay = 0;
 313            if (s->mit_ide &&
 314                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 315                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 316            }
 317            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 318                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 319            }
 320            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 321
 322            /*
 323             * According to e1000 SPEC, the Ethernet controller guarantees
 324             * a maximum observable interrupt rate of 7813 interrupts/sec.
 325             * Thus if mit_delay < 500 then the delay should be set to the
 326             * minimum delay possible which is 500.
 327             */
 328            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 329
 330            s->mit_timer_on = 1;
 331            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 332                      mit_delay * 256);
 333            s->mit_ide = 0;
 334        }
 335    }
 336
 337    s->mit_irq_level = (pending_ints != 0);
 338    pci_set_irq(d, s->mit_irq_level);
 339}
 340
 341static void
 342e1000_mit_timer(void *opaque)
 343{
 344    E1000State *s = opaque;
 345
 346    s->mit_timer_on = 0;
 347    /* Call set_interrupt_cause to update the irq level (if necessary). */
 348    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 349}
 350
 351static void
 352set_ics(E1000State *s, int index, uint32_t val)
 353{
 354    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 355        s->mac_reg[IMS]);
 356    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 357}
 358
 359static void
 360e1000_autoneg_timer(void *opaque)
 361{
 362    E1000State *s = opaque;
 363    if (!qemu_get_queue(s->nic)->link_down) {
 364        e1000_autoneg_done(s);
 365        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 366    }
 367}
 368
 369static bool e1000_vet_init_need(void *opaque)
 370{
 371    E1000State *s = opaque;
 372
 373    return chkflag(VET);
 374}
 375
 376static void e1000_reset(void *opaque)
 377{
 378    E1000State *d = opaque;
 379    E1000BaseClass *edc = E1000_GET_CLASS(d);
 380    uint8_t *macaddr = d->conf.macaddr.a;
 381
 382    timer_del(d->autoneg_timer);
 383    timer_del(d->mit_timer);
 384    timer_del(d->flush_queue_timer);
 385    d->mit_timer_on = 0;
 386    d->mit_irq_level = 0;
 387    d->mit_ide = 0;
 388    memset(d->phy_reg, 0, sizeof d->phy_reg);
 389    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 390    d->phy_reg[PHY_ID2] = edc->phy_id2;
 391    memset(d->mac_reg, 0, sizeof d->mac_reg);
 392    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 393    d->rxbuf_min_shift = 1;
 394    memset(&d->tx, 0, sizeof d->tx);
 395
 396    if (qemu_get_queue(d->nic)->link_down) {
 397        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 398    }
 399
 400    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 401
 402    if (e1000_vet_init_need(d)) {
 403        d->mac_reg[VET] = ETH_P_VLAN;
 404    }
 405}
 406
 407static void
 408set_ctrl(E1000State *s, int index, uint32_t val)
 409{
 410    /* RST is self clearing */
 411    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 412}
 413
 414static void
 415e1000_flush_queue_timer(void *opaque)
 416{
 417    E1000State *s = opaque;
 418
 419    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 420}
 421
 422static void
 423set_rx_control(E1000State *s, int index, uint32_t val)
 424{
 425    s->mac_reg[RCTL] = val;
 426    s->rxbuf_size = e1000x_rxbufsize(val);
 427    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 428    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 429           s->mac_reg[RCTL]);
 430    timer_mod(s->flush_queue_timer,
 431              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
 432}
 433
 434static void
 435set_mdic(E1000State *s, int index, uint32_t val)
 436{
 437    uint32_t data = val & E1000_MDIC_DATA_MASK;
 438    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 439
 440    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 441        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 442    else if (val & E1000_MDIC_OP_READ) {
 443        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 444        if (!(phy_regcap[addr] & PHY_R)) {
 445            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 446            val |= E1000_MDIC_ERROR;
 447        } else
 448            val = (val ^ data) | s->phy_reg[addr];
 449    } else if (val & E1000_MDIC_OP_WRITE) {
 450        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 451        if (!(phy_regcap[addr] & PHY_W)) {
 452            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 453            val |= E1000_MDIC_ERROR;
 454        } else {
 455            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 456                phyreg_writeops[addr](s, index, data);
 457            } else {
 458                s->phy_reg[addr] = data;
 459            }
 460        }
 461    }
 462    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 463
 464    if (val & E1000_MDIC_INT_EN) {
 465        set_ics(s, 0, E1000_ICR_MDAC);
 466    }
 467}
 468
 469static uint32_t
 470get_eecd(E1000State *s, int index)
 471{
 472    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 473
 474    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 475           s->eecd_state.bitnum_out, s->eecd_state.reading);
 476    if (!s->eecd_state.reading ||
 477        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 478          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 479        ret |= E1000_EECD_DO;
 480    return ret;
 481}
 482
 483static void
 484set_eecd(E1000State *s, int index, uint32_t val)
 485{
 486    uint32_t oldval = s->eecd_state.old_eecd;
 487
 488    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 489            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 490    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 491        return;
 492    }
 493    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 494        s->eecd_state.val_in = 0;
 495        s->eecd_state.bitnum_in = 0;
 496        s->eecd_state.bitnum_out = 0;
 497        s->eecd_state.reading = 0;
 498    }
 499    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 500        return;
 501    }
 502    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 503        s->eecd_state.bitnum_out++;
 504        return;
 505    }
 506    s->eecd_state.val_in <<= 1;
 507    if (val & E1000_EECD_DI)
 508        s->eecd_state.val_in |= 1;
 509    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 510        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 511        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 512            EEPROM_READ_OPCODE_MICROWIRE);
 513    }
 514    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 515           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 516           s->eecd_state.reading);
 517}
 518
 519static uint32_t
 520flash_eerd_read(E1000State *s, int x)
 521{
 522    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 523
 524    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 525        return (s->mac_reg[EERD]);
 526
 527    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 528        return (E1000_EEPROM_RW_REG_DONE | r);
 529
 530    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 531           E1000_EEPROM_RW_REG_DONE | r);
 532}
 533
 534static void
 535putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 536{
 537    uint32_t sum;
 538
 539    if (cse && cse < n)
 540        n = cse + 1;
 541    if (sloc < n-1) {
 542        sum = net_checksum_add(n-css, data+css);
 543        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 544    }
 545}
 546
 547static inline void
 548inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 549{
 550    if (!memcmp(arr, bcast, sizeof bcast)) {
 551        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 552    } else if (arr[0] & 1) {
 553        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 554    }
 555}
 556
 557static void
 558e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 559{
 560    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 561                                    PTC1023, PTC1522 };
 562
 563    NetClientState *nc = qemu_get_queue(s->nic);
 564    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 565        qemu_receive_packet(nc, buf, size);
 566    } else {
 567        qemu_send_packet(nc, buf, size);
 568    }
 569    inc_tx_bcast_or_mcast_count(s, buf);
 570    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 571}
 572
 573static void
 574xmit_seg(E1000State *s)
 575{
 576    uint16_t len;
 577    unsigned int frames = s->tx.tso_frames, css, sofar;
 578    struct e1000_tx *tp = &s->tx;
 579    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 580
 581    if (tp->cptse) {
 582        css = props->ipcss;
 583        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 584               frames, tp->size, css);
 585        if (props->ip) {    /* IPv4 */
 586            stw_be_p(tp->data+css+2, tp->size - css);
 587            stw_be_p(tp->data+css+4,
 588                     lduw_be_p(tp->data + css + 4) + frames);
 589        } else {         /* IPv6 */
 590            stw_be_p(tp->data+css+4, tp->size - css);
 591        }
 592        css = props->tucss;
 593        len = tp->size - css;
 594        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 595        if (props->tcp) {
 596            sofar = frames * props->mss;
 597            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 598            if (props->paylen - sofar > props->mss) {
 599                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 600            } else if (frames) {
 601                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 602            }
 603        } else {    /* UDP */
 604            stw_be_p(tp->data+css+4, len);
 605        }
 606        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 607            unsigned int phsum;
 608            // add pseudo-header length before checksum calculation
 609            void *sp = tp->data + props->tucso;
 610
 611            phsum = lduw_be_p(sp) + len;
 612            phsum = (phsum >> 16) + (phsum & 0xffff);
 613            stw_be_p(sp, phsum);
 614        }
 615        tp->tso_frames++;
 616    }
 617
 618    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 619        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 620    }
 621    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 622        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 623    }
 624    if (tp->vlan_needed) {
 625        memmove(tp->vlan, tp->data, 4);
 626        memmove(tp->data, tp->data + 4, 8);
 627        memcpy(tp->data + 8, tp->vlan_header, 4);
 628        e1000_send_packet(s, tp->vlan, tp->size + 4);
 629    } else {
 630        e1000_send_packet(s, tp->data, tp->size);
 631    }
 632
 633    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 634    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 635    s->mac_reg[GPTC] = s->mac_reg[TPT];
 636    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 637    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 638}
 639
 640static void
 641process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 642{
 643    PCIDevice *d = PCI_DEVICE(s);
 644    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 645    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 646    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 647    unsigned int msh = 0xfffff;
 648    uint64_t addr;
 649    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 650    struct e1000_tx *tp = &s->tx;
 651
 652    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 653    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 654        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 655            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 656            s->use_tso_for_migration = 1;
 657            tp->tso_frames = 0;
 658        } else {
 659            e1000x_read_tx_ctx_descr(xp, &tp->props);
 660            s->use_tso_for_migration = 0;
 661        }
 662        return;
 663    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 664        // data descriptor
 665        if (tp->size == 0) {
 666            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 667        }
 668        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 669    } else {
 670        // legacy descriptor
 671        tp->cptse = 0;
 672    }
 673
 674    if (e1000x_vlan_enabled(s->mac_reg) &&
 675        e1000x_is_vlan_txd(txd_lower) &&
 676        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 677        tp->vlan_needed = 1;
 678        stw_be_p(tp->vlan_header,
 679                      le16_to_cpu(s->mac_reg[VET]));
 680        stw_be_p(tp->vlan_header + 2,
 681                      le16_to_cpu(dp->upper.fields.special));
 682    }
 683
 684    addr = le64_to_cpu(dp->buffer_addr);
 685    if (tp->cptse) {
 686        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 687        do {
 688            bytes = split_size;
 689            if (tp->size >= msh) {
 690                goto eop;
 691            }
 692            if (tp->size + bytes > msh)
 693                bytes = msh - tp->size;
 694
 695            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 696            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 697            sz = tp->size + bytes;
 698            if (sz >= tp->tso_props.hdr_len
 699                && tp->size < tp->tso_props.hdr_len) {
 700                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 701            }
 702            tp->size = sz;
 703            addr += bytes;
 704            if (sz == msh) {
 705                xmit_seg(s);
 706                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 707                tp->size = tp->tso_props.hdr_len;
 708            }
 709            split_size -= bytes;
 710        } while (bytes && split_size);
 711    } else {
 712        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 713        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 714        tp->size += split_size;
 715    }
 716
 717eop:
 718    if (!(txd_lower & E1000_TXD_CMD_EOP))
 719        return;
 720    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 721        xmit_seg(s);
 722    }
 723    tp->tso_frames = 0;
 724    tp->sum_needed = 0;
 725    tp->vlan_needed = 0;
 726    tp->size = 0;
 727    tp->cptse = 0;
 728}
 729
 730static uint32_t
 731txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 732{
 733    PCIDevice *d = PCI_DEVICE(s);
 734    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 735
 736    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 737        return 0;
 738    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 739                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 740    dp->upper.data = cpu_to_le32(txd_upper);
 741    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 742                  &dp->upper, sizeof(dp->upper));
 743    return E1000_ICR_TXDW;
 744}
 745
 746static uint64_t tx_desc_base(E1000State *s)
 747{
 748    uint64_t bah = s->mac_reg[TDBAH];
 749    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 750
 751    return (bah << 32) + bal;
 752}
 753
 754static void
 755start_xmit(E1000State *s)
 756{
 757    PCIDevice *d = PCI_DEVICE(s);
 758    dma_addr_t base;
 759    struct e1000_tx_desc desc;
 760    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 761
 762    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 763        DBGOUT(TX, "tx disabled\n");
 764        return;
 765    }
 766
 767    if (s->tx.busy) {
 768        return;
 769    }
 770    s->tx.busy = true;
 771
 772    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 773        base = tx_desc_base(s) +
 774               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 775        pci_dma_read(d, base, &desc, sizeof(desc));
 776
 777        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 778               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 779               desc.upper.data);
 780
 781        process_tx_desc(s, &desc);
 782        cause |= txdesc_writeback(s, base, &desc);
 783
 784        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 785            s->mac_reg[TDH] = 0;
 786        /*
 787         * the following could happen only if guest sw assigns
 788         * bogus values to TDT/TDLEN.
 789         * there's nothing too intelligent we could do about this.
 790         */
 791        if (s->mac_reg[TDH] == tdh_start ||
 792            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 793            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 794                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 795            break;
 796        }
 797    }
 798    s->tx.busy = false;
 799    set_ics(s, 0, cause);
 800}
 801
 802static int
 803receive_filter(E1000State *s, const uint8_t *buf, int size)
 804{
 805    uint32_t rctl = s->mac_reg[RCTL];
 806    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 807
 808    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 809        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 810        uint16_t vid = lduw_be_p(buf + 14);
 811        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 812                                 ((vid >> 5) & 0x7f));
 813        if ((vfta & (1 << (vid & 0x1f))) == 0)
 814            return 0;
 815    }
 816
 817    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 818        return 1;
 819    }
 820
 821    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 822        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 823        return 1;
 824    }
 825
 826    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 827        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 828        return 1;
 829    }
 830
 831    return e1000x_rx_group_filter(s->mac_reg, buf);
 832}
 833
 834static void
 835e1000_set_link_status(NetClientState *nc)
 836{
 837    E1000State *s = qemu_get_nic_opaque(nc);
 838    uint32_t old_status = s->mac_reg[STATUS];
 839
 840    if (nc->link_down) {
 841        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 842    } else {
 843        if (have_autoneg(s) &&
 844            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 845            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 846        } else {
 847            e1000_link_up(s);
 848        }
 849    }
 850
 851    if (s->mac_reg[STATUS] != old_status)
 852        set_ics(s, 0, E1000_ICR_LSC);
 853}
 854
 855static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 856{
 857    int bufs;
 858    /* Fast-path short packets */
 859    if (total_size <= s->rxbuf_size) {
 860        return s->mac_reg[RDH] != s->mac_reg[RDT];
 861    }
 862    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 863        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 864    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 865        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 866            s->mac_reg[RDT] - s->mac_reg[RDH];
 867    } else {
 868        return false;
 869    }
 870    return total_size <= bufs * s->rxbuf_size;
 871}
 872
 873static bool
 874e1000_can_receive(NetClientState *nc)
 875{
 876    E1000State *s = qemu_get_nic_opaque(nc);
 877
 878    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 879        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
 880}
 881
 882static uint64_t rx_desc_base(E1000State *s)
 883{
 884    uint64_t bah = s->mac_reg[RDBAH];
 885    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 886
 887    return (bah << 32) + bal;
 888}
 889
 890static void
 891e1000_receiver_overrun(E1000State *s, size_t size)
 892{
 893    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
 894    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
 895    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
 896    set_ics(s, 0, E1000_ICS_RXO);
 897}
 898
 899static ssize_t
 900e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 901{
 902    E1000State *s = qemu_get_nic_opaque(nc);
 903    PCIDevice *d = PCI_DEVICE(s);
 904    struct e1000_rx_desc desc;
 905    dma_addr_t base;
 906    unsigned int n, rdt;
 907    uint32_t rdh_start;
 908    uint16_t vlan_special = 0;
 909    uint8_t vlan_status = 0;
 910    uint8_t min_buf[MIN_BUF_SIZE];
 911    struct iovec min_iov;
 912    uint8_t *filter_buf = iov->iov_base;
 913    size_t size = iov_size(iov, iovcnt);
 914    size_t iov_ofs = 0;
 915    size_t desc_offset;
 916    size_t desc_size;
 917    size_t total_size;
 918
 919    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 920        return -1;
 921    }
 922
 923    if (timer_pending(s->flush_queue_timer)) {
 924        return 0;
 925    }
 926
 927    /* Pad to minimum Ethernet frame length */
 928    if (size < sizeof(min_buf)) {
 929        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 930        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 931        min_iov.iov_base = filter_buf = min_buf;
 932        min_iov.iov_len = size = sizeof(min_buf);
 933        iovcnt = 1;
 934        iov = &min_iov;
 935    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 936        /* This is very unlikely, but may happen. */
 937        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 938        filter_buf = min_buf;
 939    }
 940
 941    /* Discard oversized packets if !LPE and !SBP. */
 942    if (e1000x_is_oversized(s->mac_reg, size)) {
 943        return size;
 944    }
 945
 946    if (!receive_filter(s, filter_buf, size)) {
 947        return size;
 948    }
 949
 950    if (e1000x_vlan_enabled(s->mac_reg) &&
 951        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 952        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 953        iov_ofs = 4;
 954        if (filter_buf == iov->iov_base) {
 955            memmove(filter_buf + 4, filter_buf, 12);
 956        } else {
 957            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 958            while (iov->iov_len <= iov_ofs) {
 959                iov_ofs -= iov->iov_len;
 960                iov++;
 961            }
 962        }
 963        vlan_status = E1000_RXD_STAT_VP;
 964        size -= 4;
 965    }
 966
 967    rdh_start = s->mac_reg[RDH];
 968    desc_offset = 0;
 969    total_size = size + e1000x_fcs_len(s->mac_reg);
 970    if (!e1000_has_rxbufs(s, total_size)) {
 971        e1000_receiver_overrun(s, total_size);
 972        return -1;
 973    }
 974    do {
 975        desc_size = total_size - desc_offset;
 976        if (desc_size > s->rxbuf_size) {
 977            desc_size = s->rxbuf_size;
 978        }
 979        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 980        pci_dma_read(d, base, &desc, sizeof(desc));
 981        desc.special = vlan_special;
 982        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 983        if (desc.buffer_addr) {
 984            if (desc_offset < size) {
 985                size_t iov_copy;
 986                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 987                size_t copy_size = size - desc_offset;
 988                if (copy_size > s->rxbuf_size) {
 989                    copy_size = s->rxbuf_size;
 990                }
 991                do {
 992                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 993                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 994                    copy_size -= iov_copy;
 995                    ba += iov_copy;
 996                    iov_ofs += iov_copy;
 997                    if (iov_ofs == iov->iov_len) {
 998                        iov++;
 999                        iov_ofs = 0;
1000                    }
1001                } while (copy_size);
1002            }
1003            desc_offset += desc_size;
1004            desc.length = cpu_to_le16(desc_size);
1005            if (desc_offset >= total_size) {
1006                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1007            } else {
1008                /* Guest zeroing out status is not a hardware requirement.
1009                   Clear EOP in case guest didn't do it. */
1010                desc.status &= ~E1000_RXD_STAT_EOP;
1011            }
1012        } else { // as per intel docs; skip descriptors with null buf addr
1013            DBGOUT(RX, "Null RX descriptor!!\n");
1014        }
1015        pci_dma_write(d, base, &desc, sizeof(desc));
1016
1017        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1018            s->mac_reg[RDH] = 0;
1019        /* see comment in start_xmit; same here */
1020        if (s->mac_reg[RDH] == rdh_start ||
1021            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1022            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1023                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1024            e1000_receiver_overrun(s, total_size);
1025            return -1;
1026        }
1027    } while (desc_offset < total_size);
1028
1029    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1030
1031    n = E1000_ICS_RXT0;
1032    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1033        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1034    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1035        s->rxbuf_min_shift)
1036        n |= E1000_ICS_RXDMT0;
1037
1038    set_ics(s, 0, n);
1039
1040    return size;
1041}
1042
1043static ssize_t
1044e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1045{
1046    const struct iovec iov = {
1047        .iov_base = (uint8_t *)buf,
1048        .iov_len = size
1049    };
1050
1051    return e1000_receive_iov(nc, &iov, 1);
1052}
1053
1054static uint32_t
1055mac_readreg(E1000State *s, int index)
1056{
1057    return s->mac_reg[index];
1058}
1059
1060static uint32_t
1061mac_low4_read(E1000State *s, int index)
1062{
1063    return s->mac_reg[index] & 0xf;
1064}
1065
1066static uint32_t
1067mac_low11_read(E1000State *s, int index)
1068{
1069    return s->mac_reg[index] & 0x7ff;
1070}
1071
1072static uint32_t
1073mac_low13_read(E1000State *s, int index)
1074{
1075    return s->mac_reg[index] & 0x1fff;
1076}
1077
1078static uint32_t
1079mac_low16_read(E1000State *s, int index)
1080{
1081    return s->mac_reg[index] & 0xffff;
1082}
1083
1084static uint32_t
1085mac_icr_read(E1000State *s, int index)
1086{
1087    uint32_t ret = s->mac_reg[ICR];
1088
1089    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1090    set_interrupt_cause(s, 0, 0);
1091    return ret;
1092}
1093
1094static uint32_t
1095mac_read_clr4(E1000State *s, int index)
1096{
1097    uint32_t ret = s->mac_reg[index];
1098
1099    s->mac_reg[index] = 0;
1100    return ret;
1101}
1102
1103static uint32_t
1104mac_read_clr8(E1000State *s, int index)
1105{
1106    uint32_t ret = s->mac_reg[index];
1107
1108    s->mac_reg[index] = 0;
1109    s->mac_reg[index-1] = 0;
1110    return ret;
1111}
1112
1113static void
1114mac_writereg(E1000State *s, int index, uint32_t val)
1115{
1116    uint32_t macaddr[2];
1117
1118    s->mac_reg[index] = val;
1119
1120    if (index == RA + 1) {
1121        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1122        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1123        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1124    }
1125}
1126
1127static void
1128set_rdt(E1000State *s, int index, uint32_t val)
1129{
1130    s->mac_reg[index] = val & 0xffff;
1131    if (e1000_has_rxbufs(s, 1)) {
1132        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1133    }
1134}
1135
1136static void
1137set_16bit(E1000State *s, int index, uint32_t val)
1138{
1139    s->mac_reg[index] = val & 0xffff;
1140}
1141
1142static void
1143set_dlen(E1000State *s, int index, uint32_t val)
1144{
1145    s->mac_reg[index] = val & 0xfff80;
1146}
1147
1148static void
1149set_tctl(E1000State *s, int index, uint32_t val)
1150{
1151    s->mac_reg[index] = val;
1152    s->mac_reg[TDT] &= 0xffff;
1153    start_xmit(s);
1154}
1155
1156static void
1157set_icr(E1000State *s, int index, uint32_t val)
1158{
1159    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1160    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1161}
1162
1163static void
1164set_imc(E1000State *s, int index, uint32_t val)
1165{
1166    s->mac_reg[IMS] &= ~val;
1167    set_ics(s, 0, 0);
1168}
1169
1170static void
1171set_ims(E1000State *s, int index, uint32_t val)
1172{
1173    s->mac_reg[IMS] |= val;
1174    set_ics(s, 0, 0);
1175}
1176
1177#define getreg(x)    [x] = mac_readreg
1178typedef uint32_t (*readops)(E1000State *, int);
1179static const readops macreg_readops[] = {
1180    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1181    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1182    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1183    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1184    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1185    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1186    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1187    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1188    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1189    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1190    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1191    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1192    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1193    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1194    getreg(GOTCL),
1195
1196    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1197    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1198    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1199    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1200    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1201    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1202    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1203    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1204    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1205    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1206    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1207    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1208    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1209    [MPTC]    = mac_read_clr4,
1210    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1211    [EERD]    = flash_eerd_read,
1212    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1213    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1214    [RDFPC]   = mac_low13_read,
1215    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1216    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1217    [TDFPC]   = mac_low13_read,
1218    [AIT]     = mac_low16_read,
1219
1220    [CRCERRS ... MPC]   = &mac_readreg,
1221    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1222    [FFLT ... FFLT+6]   = &mac_low11_read,
1223    [RA ... RA+31]      = &mac_readreg,
1224    [WUPM ... WUPM+31]  = &mac_readreg,
1225    [MTA ... MTA+127]   = &mac_readreg,
1226    [VFTA ... VFTA+127] = &mac_readreg,
1227    [FFMT ... FFMT+254] = &mac_low4_read,
1228    [FFVT ... FFVT+254] = &mac_readreg,
1229    [PBM ... PBM+16383] = &mac_readreg,
1230};
1231enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1232
1233#define putreg(x)    [x] = mac_writereg
1234typedef void (*writeops)(E1000State *, int, uint32_t);
1235static const writeops macreg_writeops[] = {
1236    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1237    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1238    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1239    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1240    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1241    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1242    putreg(WUS),      putreg(AIT),
1243
1244    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1245    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1246    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1247    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1248    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1249    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1250    [ITR]    = set_16bit,
1251
1252    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1253    [FFLT ... FFLT+6]   = &mac_writereg,
1254    [RA ... RA+31]      = &mac_writereg,
1255    [WUPM ... WUPM+31]  = &mac_writereg,
1256    [MTA ... MTA+127]   = &mac_writereg,
1257    [VFTA ... VFTA+127] = &mac_writereg,
1258    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1259    [PBM ... PBM+16383] = &mac_writereg,
1260};
1261
1262enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1263
1264enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1265
1266#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1267/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1268 * f - flag bits (up to 6 possible flags)
1269 * n - flag needed
1270 * p - partially implenented */
1271static const uint8_t mac_reg_access[0x8000] = {
1272    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1273    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1274
1275    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1276    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1277    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1278    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1279    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1280    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1281    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1282    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1283    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1284    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1285    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1286    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1287    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1288    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1289    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1290    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1291    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1292    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1293    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1294    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1295    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1296    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1297    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1298    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1299    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1300    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1301    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1302    [BPTC]    = markflag(MAC),
1303
1304    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1305    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1306    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1307    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1308    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1309    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1310    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1311    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1312    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1313    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1314    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1315};
1316
1317static void
1318e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1319                 unsigned size)
1320{
1321    E1000State *s = opaque;
1322    unsigned int index = (addr & 0x1ffff) >> 2;
1323
1324    if (index < NWRITEOPS && macreg_writeops[index]) {
1325        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1326            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1327            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1328                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1329                       "It is not fully implemented.\n", index<<2);
1330            }
1331            macreg_writeops[index](s, index, val);
1332        } else {    /* "flag needed" bit is set, but the flag is not active */
1333            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1334                   index<<2);
1335        }
1336    } else if (index < NREADOPS && macreg_readops[index]) {
1337        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1338               index<<2, val);
1339    } else {
1340        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1341               index<<2, val);
1342    }
1343}
1344
1345static uint64_t
1346e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1347{
1348    E1000State *s = opaque;
1349    unsigned int index = (addr & 0x1ffff) >> 2;
1350
1351    if (index < NREADOPS && macreg_readops[index]) {
1352        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1353            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1354            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1355                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1356                       "It is not fully implemented.\n", index<<2);
1357            }
1358            return macreg_readops[index](s, index);
1359        } else {    /* "flag needed" bit is set, but the flag is not active */
1360            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1361                   index<<2);
1362        }
1363    } else {
1364        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1365    }
1366    return 0;
1367}
1368
1369static const MemoryRegionOps e1000_mmio_ops = {
1370    .read = e1000_mmio_read,
1371    .write = e1000_mmio_write,
1372    .endianness = DEVICE_LITTLE_ENDIAN,
1373    .impl = {
1374        .min_access_size = 4,
1375        .max_access_size = 4,
1376    },
1377};
1378
1379static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1380                              unsigned size)
1381{
1382    E1000State *s = opaque;
1383
1384    (void)s;
1385    return 0;
1386}
1387
1388static void e1000_io_write(void *opaque, hwaddr addr,
1389                           uint64_t val, unsigned size)
1390{
1391    E1000State *s = opaque;
1392
1393    (void)s;
1394}
1395
1396static const MemoryRegionOps e1000_io_ops = {
1397    .read = e1000_io_read,
1398    .write = e1000_io_write,
1399    .endianness = DEVICE_LITTLE_ENDIAN,
1400};
1401
1402static bool is_version_1(void *opaque, int version_id)
1403{
1404    return version_id == 1;
1405}
1406
1407static int e1000_pre_save(void *opaque)
1408{
1409    E1000State *s = opaque;
1410    NetClientState *nc = qemu_get_queue(s->nic);
1411
1412    /*
1413     * If link is down and auto-negotiation is supported and ongoing,
1414     * complete auto-negotiation immediately. This allows us to look
1415     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1416     */
1417    if (nc->link_down && have_autoneg(s)) {
1418        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1419    }
1420
1421    /* Decide which set of props to migrate in the main structure */
1422    if (chkflag(TSO) || !s->use_tso_for_migration) {
1423        /* Either we're migrating with the extra subsection, in which
1424         * case the mig_props is always 'props' OR
1425         * we've not got the subsection, but 'props' was the last
1426         * updated.
1427         */
1428        s->mig_props = s->tx.props;
1429    } else {
1430        /* We're not using the subsection, and 'tso_props' was
1431         * the last updated.
1432         */
1433        s->mig_props = s->tx.tso_props;
1434    }
1435    return 0;
1436}
1437
1438static int e1000_post_load(void *opaque, int version_id)
1439{
1440    E1000State *s = opaque;
1441    NetClientState *nc = qemu_get_queue(s->nic);
1442
1443    if (!chkflag(MIT)) {
1444        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1445            s->mac_reg[TADV] = 0;
1446        s->mit_irq_level = false;
1447    }
1448    s->mit_ide = 0;
1449    s->mit_timer_on = true;
1450    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1451
1452    /* nc.link_down can't be migrated, so infer link_down according
1453     * to link status bit in mac_reg[STATUS].
1454     * Alternatively, restart link negotiation if it was in progress. */
1455    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1456
1457    if (have_autoneg(s) &&
1458        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1459        nc->link_down = false;
1460        timer_mod(s->autoneg_timer,
1461                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1462    }
1463
1464    s->tx.props = s->mig_props;
1465    if (!s->received_tx_tso) {
1466        /* We received only one set of offload data (tx.props)
1467         * and haven't got tx.tso_props.  The best we can do
1468         * is dupe the data.
1469         */
1470        s->tx.tso_props = s->mig_props;
1471    }
1472    return 0;
1473}
1474
1475static int e1000_tx_tso_post_load(void *opaque, int version_id)
1476{
1477    E1000State *s = opaque;
1478    s->received_tx_tso = true;
1479    return 0;
1480}
1481
1482static bool e1000_mit_state_needed(void *opaque)
1483{
1484    E1000State *s = opaque;
1485
1486    return chkflag(MIT);
1487}
1488
1489static bool e1000_full_mac_needed(void *opaque)
1490{
1491    E1000State *s = opaque;
1492
1493    return chkflag(MAC);
1494}
1495
1496static bool e1000_tso_state_needed(void *opaque)
1497{
1498    E1000State *s = opaque;
1499
1500    return chkflag(TSO);
1501}
1502
1503static const VMStateDescription vmstate_e1000_mit_state = {
1504    .name = "e1000/mit_state",
1505    .version_id = 1,
1506    .minimum_version_id = 1,
1507    .needed = e1000_mit_state_needed,
1508    .fields = (VMStateField[]) {
1509        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1510        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1511        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1512        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1513        VMSTATE_BOOL(mit_irq_level, E1000State),
1514        VMSTATE_END_OF_LIST()
1515    }
1516};
1517
1518static const VMStateDescription vmstate_e1000_full_mac_state = {
1519    .name = "e1000/full_mac_state",
1520    .version_id = 1,
1521    .minimum_version_id = 1,
1522    .needed = e1000_full_mac_needed,
1523    .fields = (VMStateField[]) {
1524        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1525        VMSTATE_END_OF_LIST()
1526    }
1527};
1528
1529static const VMStateDescription vmstate_e1000_tx_tso_state = {
1530    .name = "e1000/tx_tso_state",
1531    .version_id = 1,
1532    .minimum_version_id = 1,
1533    .needed = e1000_tso_state_needed,
1534    .post_load = e1000_tx_tso_post_load,
1535    .fields = (VMStateField[]) {
1536        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1537        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1538        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1539        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1540        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1541        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1542        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1543        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1544        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1545        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1546        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1547        VMSTATE_END_OF_LIST()
1548    }
1549};
1550
1551static const VMStateDescription vmstate_e1000 = {
1552    .name = "e1000",
1553    .version_id = 2,
1554    .minimum_version_id = 1,
1555    .pre_save = e1000_pre_save,
1556    .post_load = e1000_post_load,
1557    .fields = (VMStateField[]) {
1558        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1559        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1560        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1561        VMSTATE_UINT32(rxbuf_size, E1000State),
1562        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1563        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1564        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1565        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1566        VMSTATE_UINT16(eecd_state.reading, E1000State),
1567        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1568        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1569        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1570        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1571        VMSTATE_UINT8(mig_props.tucss, E1000State),
1572        VMSTATE_UINT8(mig_props.tucso, E1000State),
1573        VMSTATE_UINT16(mig_props.tucse, E1000State),
1574        VMSTATE_UINT32(mig_props.paylen, E1000State),
1575        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1576        VMSTATE_UINT16(mig_props.mss, E1000State),
1577        VMSTATE_UINT16(tx.size, E1000State),
1578        VMSTATE_UINT16(tx.tso_frames, E1000State),
1579        VMSTATE_UINT8(tx.sum_needed, E1000State),
1580        VMSTATE_INT8(mig_props.ip, E1000State),
1581        VMSTATE_INT8(mig_props.tcp, E1000State),
1582        VMSTATE_BUFFER(tx.header, E1000State),
1583        VMSTATE_BUFFER(tx.data, E1000State),
1584        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1585        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1586        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1587        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1588        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1589        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1590        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1591        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1592        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1593        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1594        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1595        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1596        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1597        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1598        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1599        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1600        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1601        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1602        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1603        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1604        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1605        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1606        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1607        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1608        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1609        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1610        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1611        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1612        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1613        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1614        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1615        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1616        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1617        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1618        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1619        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1620        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1621        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1622        VMSTATE_UINT32(mac_reg[VET], E1000State),
1623        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1624        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1625        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1626        VMSTATE_END_OF_LIST()
1627    },
1628    .subsections = (const VMStateDescription*[]) {
1629        &vmstate_e1000_mit_state,
1630        &vmstate_e1000_full_mac_state,
1631        &vmstate_e1000_tx_tso_state,
1632        NULL
1633    }
1634};
1635
1636/*
1637 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1638 * Note: A valid DevId will be inserted during pci_e1000_realize().
1639 */
1640static const uint16_t e1000_eeprom_template[64] = {
1641    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1642    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1643    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1644    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1645    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1646    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1647    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1648    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1649};
1650
1651/* PCI interface */
1652
1653static void
1654e1000_mmio_setup(E1000State *d)
1655{
1656    int i;
1657    const uint32_t excluded_regs[] = {
1658        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1659        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1660    };
1661
1662    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1663                          "e1000-mmio", PNPMMIO_SIZE);
1664    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1665    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1666        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1667                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1668    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1669}
1670
1671static void
1672pci_e1000_uninit(PCIDevice *dev)
1673{
1674    E1000State *d = E1000(dev);
1675
1676    timer_free(d->autoneg_timer);
1677    timer_free(d->mit_timer);
1678    timer_free(d->flush_queue_timer);
1679    qemu_del_nic(d->nic);
1680}
1681
1682static NetClientInfo net_e1000_info = {
1683    .type = NET_CLIENT_DRIVER_NIC,
1684    .size = sizeof(NICState),
1685    .can_receive = e1000_can_receive,
1686    .receive = e1000_receive,
1687    .receive_iov = e1000_receive_iov,
1688    .link_status_changed = e1000_set_link_status,
1689};
1690
1691static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1692                                uint32_t val, int len)
1693{
1694    E1000State *s = E1000(pci_dev);
1695
1696    pci_default_write_config(pci_dev, address, val, len);
1697
1698    if (range_covers_byte(address, len, PCI_COMMAND) &&
1699        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1700        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1701    }
1702}
1703
1704static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1705{
1706    DeviceState *dev = DEVICE(pci_dev);
1707    E1000State *d = E1000(pci_dev);
1708    uint8_t *pci_conf;
1709    uint8_t *macaddr;
1710
1711    pci_dev->config_write = e1000_write_config;
1712
1713    pci_conf = pci_dev->config;
1714
1715    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1716    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1717
1718    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1719
1720    e1000_mmio_setup(d);
1721
1722    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1723
1724    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1725
1726    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1727    macaddr = d->conf.macaddr.a;
1728
1729    e1000x_core_prepare_eeprom(d->eeprom_data,
1730                               e1000_eeprom_template,
1731                               sizeof(e1000_eeprom_template),
1732                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1733                               macaddr);
1734
1735    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1736                          object_get_typename(OBJECT(d)), dev->id, d);
1737
1738    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1739
1740    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1741    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1742    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1743                                        e1000_flush_queue_timer, d);
1744}
1745
1746static void qdev_e1000_reset(DeviceState *dev)
1747{
1748    E1000State *d = E1000(dev);
1749    e1000_reset(d);
1750}
1751
1752static Property e1000_properties[] = {
1753    DEFINE_NIC_PROPERTIES(E1000State, conf),
1754    DEFINE_PROP_BIT("autonegotiation", E1000State,
1755                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1756    DEFINE_PROP_BIT("mitigation", E1000State,
1757                    compat_flags, E1000_FLAG_MIT_BIT, true),
1758    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1759                    compat_flags, E1000_FLAG_MAC_BIT, true),
1760    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1761                    compat_flags, E1000_FLAG_TSO_BIT, true),
1762    DEFINE_PROP_BIT("init-vet", E1000State,
1763                    compat_flags, E1000_FLAG_VET_BIT, true),
1764    DEFINE_PROP_END_OF_LIST(),
1765};
1766
1767typedef struct E1000Info {
1768    const char *name;
1769    uint16_t   device_id;
1770    uint8_t    revision;
1771    uint16_t   phy_id2;
1772} E1000Info;
1773
1774static void e1000_class_init(ObjectClass *klass, void *data)
1775{
1776    DeviceClass *dc = DEVICE_CLASS(klass);
1777    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1778    E1000BaseClass *e = E1000_CLASS(klass);
1779    const E1000Info *info = data;
1780
1781    k->realize = pci_e1000_realize;
1782    k->exit = pci_e1000_uninit;
1783    k->romfile = "efi-e1000.rom";
1784    k->vendor_id = PCI_VENDOR_ID_INTEL;
1785    k->device_id = info->device_id;
1786    k->revision = info->revision;
1787    e->phy_id2 = info->phy_id2;
1788    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1789    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1790    dc->desc = "Intel Gigabit Ethernet";
1791    dc->reset = qdev_e1000_reset;
1792    dc->vmsd = &vmstate_e1000;
1793    device_class_set_props(dc, e1000_properties);
1794}
1795
1796static void e1000_instance_init(Object *obj)
1797{
1798    E1000State *n = E1000(obj);
1799    device_add_bootindex_property(obj, &n->conf.bootindex,
1800                                  "bootindex", "/ethernet-phy@0",
1801                                  DEVICE(n));
1802}
1803
1804static const TypeInfo e1000_base_info = {
1805    .name          = TYPE_E1000_BASE,
1806    .parent        = TYPE_PCI_DEVICE,
1807    .instance_size = sizeof(E1000State),
1808    .instance_init = e1000_instance_init,
1809    .class_size    = sizeof(E1000BaseClass),
1810    .abstract      = true,
1811    .interfaces = (InterfaceInfo[]) {
1812        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1813        { },
1814    },
1815};
1816
1817static const E1000Info e1000_devices[] = {
1818    {
1819        .name      = "e1000",
1820        .device_id = E1000_DEV_ID_82540EM,
1821        .revision  = 0x03,
1822        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1823    },
1824    {
1825        .name      = "e1000-82544gc",
1826        .device_id = E1000_DEV_ID_82544GC_COPPER,
1827        .revision  = 0x03,
1828        .phy_id2   = E1000_PHY_ID2_82544x,
1829    },
1830    {
1831        .name      = "e1000-82545em",
1832        .device_id = E1000_DEV_ID_82545EM_COPPER,
1833        .revision  = 0x03,
1834        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1835    },
1836};
1837
1838static void e1000_register_types(void)
1839{
1840    int i;
1841
1842    type_register_static(&e1000_base_info);
1843    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1844        const E1000Info *info = &e1000_devices[i];
1845        TypeInfo type_info = {};
1846
1847        type_info.name = info->name;
1848        type_info.parent = TYPE_E1000_BASE;
1849        type_info.class_data = (void *)info;
1850        type_info.class_init = e1000_class_init;
1851
1852        type_register(&type_info);
1853    }
1854}
1855
1856type_init(e1000_register_types)
1857