qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2.1 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/net/mii.h"
  30#include "hw/pci/pci_device.h"
  31#include "hw/qdev-properties.h"
  32#include "migration/vmstate.h"
  33#include "net/eth.h"
  34#include "net/net.h"
  35#include "net/checksum.h"
  36#include "sysemu/sysemu.h"
  37#include "sysemu/dma.h"
  38#include "qemu/iov.h"
  39#include "qemu/module.h"
  40#include "qemu/range.h"
  41
  42#include "e1000_common.h"
  43#include "e1000x_common.h"
  44#include "trace.h"
  45#include "qom/object.h"
  46
  47/* #define E1000_DEBUG */
  48
  49#ifdef E1000_DEBUG
  50enum {
  51    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  52    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  53    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  54    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  55};
  56#define DBGBIT(x)    (1<<DEBUG_##x)
  57static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  58
  59#define DBGOUT(what, fmt, ...) do { \
  60    if (debugflags & DBGBIT(what)) \
  61        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  62    } while (0)
  63#else
  64#define DBGOUT(what, fmt, ...) do {} while (0)
  65#endif
  66
  67#define IOPORT_SIZE       0x40
  68#define PNPMMIO_SIZE      0x20000
  69
  70#define MAXIMUM_ETHERNET_HDR_LEN (ETH_HLEN + 4)
  71
  72/*
  73 * HW models:
  74 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  75 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  76 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  77 *  Others never tested
  78 */
  79
  80struct E1000State_st {
  81    /*< private >*/
  82    PCIDevice parent_obj;
  83    /*< public >*/
  84
  85    NICState *nic;
  86    NICConf conf;
  87    MemoryRegion mmio;
  88    MemoryRegion io;
  89
  90    uint32_t mac_reg[0x8000];
  91    uint16_t phy_reg[0x20];
  92    uint16_t eeprom_data[64];
  93
  94    uint32_t rxbuf_size;
  95    uint32_t rxbuf_min_shift;
  96    struct e1000_tx {
  97        unsigned char header[256];
  98        unsigned char vlan_header[4];
  99        /* Fields vlan and data must not be reordered or separated. */
 100        unsigned char vlan[4];
 101        unsigned char data[0x10000];
 102        uint16_t size;
 103        unsigned char vlan_needed;
 104        unsigned char sum_needed;
 105        bool cptse;
 106        e1000x_txd_props props;
 107        e1000x_txd_props tso_props;
 108        uint16_t tso_frames;
 109        bool busy;
 110    } tx;
 111
 112    struct {
 113        uint32_t val_in;    /* shifted in from guest driver */
 114        uint16_t bitnum_in;
 115        uint16_t bitnum_out;
 116        uint16_t reading;
 117        uint32_t old_eecd;
 118    } eecd_state;
 119
 120    QEMUTimer *autoneg_timer;
 121
 122    QEMUTimer *mit_timer;      /* Mitigation timer. */
 123    bool mit_timer_on;         /* Mitigation timer is running. */
 124    bool mit_irq_level;        /* Tracks interrupt pin level. */
 125    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 126
 127    QEMUTimer *flush_queue_timer;
 128
 129/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 130#define E1000_FLAG_AUTONEG_BIT 0
 131#define E1000_FLAG_MIT_BIT 1
 132#define E1000_FLAG_MAC_BIT 2
 133#define E1000_FLAG_TSO_BIT 3
 134#define E1000_FLAG_VET_BIT 4
 135#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 136#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 137#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 138#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 139#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
 140
 141    uint32_t compat_flags;
 142    bool received_tx_tso;
 143    bool use_tso_for_migration;
 144    e1000x_txd_props mig_props;
 145};
 146typedef struct E1000State_st E1000State;
 147
 148#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 149
 150struct E1000BaseClass {
 151    PCIDeviceClass parent_class;
 152    uint16_t phy_id2;
 153};
 154typedef struct E1000BaseClass E1000BaseClass;
 155
 156#define TYPE_E1000_BASE "e1000-base"
 157
 158DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
 159                     E1000, TYPE_E1000_BASE)
 160
 161
 162static void
 163e1000_link_up(E1000State *s)
 164{
 165    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 166
 167    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 168    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 169}
 170
 171static void
 172e1000_autoneg_done(E1000State *s)
 173{
 174    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 175
 176    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 177    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 178}
 179
 180static bool
 181have_autoneg(E1000State *s)
 182{
 183    return chkflag(AUTONEG) && (s->phy_reg[MII_BMCR] & MII_BMCR_AUTOEN);
 184}
 185
 186static void
 187set_phy_ctrl(E1000State *s, int index, uint16_t val)
 188{
 189    /* bits 0-5 reserved; MII_BMCR_[ANRESTART,RESET] are self clearing */
 190    s->phy_reg[MII_BMCR] = val & ~(0x3f |
 191                                   MII_BMCR_RESET |
 192                                   MII_BMCR_ANRESTART);
 193
 194    /*
 195     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 196     * migrate during auto negotiation, after migration the link will be
 197     * down.
 198     */
 199    if (have_autoneg(s) && (val & MII_BMCR_ANRESTART)) {
 200        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 201    }
 202}
 203
 204static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 205    [MII_BMCR] = set_phy_ctrl,
 206};
 207
 208enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 209
 210enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 211static const char phy_regcap[0x20] = {
 212    [MII_BMSR]   = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 213    [MII_PHYID1] = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 214    [MII_BMCR]   = PHY_RW,    [MII_CTRL1000]               = PHY_RW,
 215    [MII_ANLPAR] = PHY_R,     [MII_STAT1000]               = PHY_R,
 216    [MII_ANAR]   = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 217    [MII_PHYID2] = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 218    [MII_ANER]   = PHY_R,
 219};
 220
 221/* MII_PHYID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 222static const uint16_t phy_reg_init[] = {
 223    [MII_BMCR] = MII_BMCR_SPEED1000 |
 224                 MII_BMCR_FD |
 225                 MII_BMCR_AUTOEN,
 226
 227    [MII_BMSR] = MII_BMSR_EXTCAP |
 228                 MII_BMSR_LINK_ST |   /* link initially up */
 229                 MII_BMSR_AUTONEG |
 230                 /* MII_BMSR_AN_COMP: initially NOT completed */
 231                 MII_BMSR_MFPS |
 232                 MII_BMSR_EXTSTAT |
 233                 MII_BMSR_10T_HD |
 234                 MII_BMSR_10T_FD |
 235                 MII_BMSR_100TX_HD |
 236                 MII_BMSR_100TX_FD,
 237
 238    [MII_PHYID1] = 0x141,
 239    /* [MII_PHYID2] configured per DevId, from e1000_reset() */
 240    [MII_ANAR] = MII_ANAR_CSMACD | MII_ANAR_10 |
 241                 MII_ANAR_10FD | MII_ANAR_TX |
 242                 MII_ANAR_TXFD | MII_ANAR_PAUSE |
 243                 MII_ANAR_PAUSE_ASYM,
 244    [MII_ANLPAR] = MII_ANLPAR_10 | MII_ANLPAR_10FD |
 245                   MII_ANLPAR_TX | MII_ANLPAR_TXFD,
 246    [MII_CTRL1000] = MII_CTRL1000_FULL | MII_CTRL1000_PORT |
 247                     MII_CTRL1000_MASTER,
 248    [MII_STAT1000] = MII_STAT1000_HALF | MII_STAT1000_FULL |
 249                     MII_STAT1000_ROK | MII_STAT1000_LOK,
 250    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 251    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 252    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 253};
 254
 255static const uint32_t mac_reg_init[] = {
 256    [PBA]     = 0x00100030,
 257    [LEDCTL]  = 0x602,
 258    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 259                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 260    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 261                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 262                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 263                E1000_STATUS_LU,
 264    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 265                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 266                E1000_MANC_RMCP_EN,
 267};
 268
 269/* Helper function, *curr == 0 means the value is not set */
 270static inline void
 271mit_update_delay(uint32_t *curr, uint32_t value)
 272{
 273    if (value && (*curr == 0 || value < *curr)) {
 274        *curr = value;
 275    }
 276}
 277
 278static void
 279set_interrupt_cause(E1000State *s, int index, uint32_t val)
 280{
 281    PCIDevice *d = PCI_DEVICE(s);
 282    uint32_t pending_ints;
 283    uint32_t mit_delay;
 284
 285    s->mac_reg[ICR] = val;
 286
 287    /*
 288     * Make sure ICR and ICS registers have the same value.
 289     * The spec says that the ICS register is write-only.  However in practice,
 290     * on real hardware ICS is readable, and for reads it has the same value as
 291     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 292     *
 293     * The VxWorks PRO/1000 driver uses this behaviour.
 294     */
 295    s->mac_reg[ICS] = val;
 296
 297    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 298    if (!s->mit_irq_level && pending_ints) {
 299        /*
 300         * Here we detect a potential raising edge. We postpone raising the
 301         * interrupt line if we are inside the mitigation delay window
 302         * (s->mit_timer_on == 1).
 303         * We provide a partial implementation of interrupt mitigation,
 304         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 305         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 306         * RADV; relative timers based on TIDV and RDTR are not implemented.
 307         */
 308        if (s->mit_timer_on) {
 309            return;
 310        }
 311        if (chkflag(MIT)) {
 312            /* Compute the next mitigation delay according to pending
 313             * interrupts and the current values of RADV (provided
 314             * RDTR!=0), TADV and ITR.
 315             * Then rearm the timer.
 316             */
 317            mit_delay = 0;
 318            if (s->mit_ide &&
 319                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 320                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 321            }
 322            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 323                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 324            }
 325            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 326
 327            /*
 328             * According to e1000 SPEC, the Ethernet controller guarantees
 329             * a maximum observable interrupt rate of 7813 interrupts/sec.
 330             * Thus if mit_delay < 500 then the delay should be set to the
 331             * minimum delay possible which is 500.
 332             */
 333            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 334
 335            s->mit_timer_on = 1;
 336            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 337                      mit_delay * 256);
 338            s->mit_ide = 0;
 339        }
 340    }
 341
 342    s->mit_irq_level = (pending_ints != 0);
 343    pci_set_irq(d, s->mit_irq_level);
 344}
 345
 346static void
 347e1000_mit_timer(void *opaque)
 348{
 349    E1000State *s = opaque;
 350
 351    s->mit_timer_on = 0;
 352    /* Call set_interrupt_cause to update the irq level (if necessary). */
 353    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 354}
 355
 356static void
 357set_ics(E1000State *s, int index, uint32_t val)
 358{
 359    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 360        s->mac_reg[IMS]);
 361    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 362}
 363
 364static void
 365e1000_autoneg_timer(void *opaque)
 366{
 367    E1000State *s = opaque;
 368    if (!qemu_get_queue(s->nic)->link_down) {
 369        e1000_autoneg_done(s);
 370        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 371    }
 372}
 373
 374static bool e1000_vet_init_need(void *opaque)
 375{
 376    E1000State *s = opaque;
 377
 378    return chkflag(VET);
 379}
 380
 381static void e1000_reset_hold(Object *obj)
 382{
 383    E1000State *d = E1000(obj);
 384    E1000BaseClass *edc = E1000_GET_CLASS(d);
 385    uint8_t *macaddr = d->conf.macaddr.a;
 386
 387    timer_del(d->autoneg_timer);
 388    timer_del(d->mit_timer);
 389    timer_del(d->flush_queue_timer);
 390    d->mit_timer_on = 0;
 391    d->mit_irq_level = 0;
 392    d->mit_ide = 0;
 393    memset(d->phy_reg, 0, sizeof d->phy_reg);
 394    memcpy(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 395    d->phy_reg[MII_PHYID2] = edc->phy_id2;
 396    memset(d->mac_reg, 0, sizeof d->mac_reg);
 397    memcpy(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 398    d->rxbuf_min_shift = 1;
 399    memset(&d->tx, 0, sizeof d->tx);
 400
 401    if (qemu_get_queue(d->nic)->link_down) {
 402        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 403    }
 404
 405    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 406
 407    if (e1000_vet_init_need(d)) {
 408        d->mac_reg[VET] = ETH_P_VLAN;
 409    }
 410}
 411
 412static void
 413set_ctrl(E1000State *s, int index, uint32_t val)
 414{
 415    /* RST is self clearing */
 416    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 417}
 418
 419static void
 420e1000_flush_queue_timer(void *opaque)
 421{
 422    E1000State *s = opaque;
 423
 424    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 425}
 426
 427static void
 428set_rx_control(E1000State *s, int index, uint32_t val)
 429{
 430    s->mac_reg[RCTL] = val;
 431    s->rxbuf_size = e1000x_rxbufsize(val);
 432    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 433    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 434           s->mac_reg[RCTL]);
 435    timer_mod(s->flush_queue_timer,
 436              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
 437}
 438
 439static void
 440set_mdic(E1000State *s, int index, uint32_t val)
 441{
 442    uint32_t data = val & E1000_MDIC_DATA_MASK;
 443    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 444
 445    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 446        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 447    else if (val & E1000_MDIC_OP_READ) {
 448        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 449        if (!(phy_regcap[addr] & PHY_R)) {
 450            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 451            val |= E1000_MDIC_ERROR;
 452        } else
 453            val = (val ^ data) | s->phy_reg[addr];
 454    } else if (val & E1000_MDIC_OP_WRITE) {
 455        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 456        if (!(phy_regcap[addr] & PHY_W)) {
 457            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 458            val |= E1000_MDIC_ERROR;
 459        } else {
 460            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 461                phyreg_writeops[addr](s, index, data);
 462            } else {
 463                s->phy_reg[addr] = data;
 464            }
 465        }
 466    }
 467    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 468
 469    if (val & E1000_MDIC_INT_EN) {
 470        set_ics(s, 0, E1000_ICR_MDAC);
 471    }
 472}
 473
 474static uint32_t
 475get_eecd(E1000State *s, int index)
 476{
 477    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 478
 479    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 480           s->eecd_state.bitnum_out, s->eecd_state.reading);
 481    if (!s->eecd_state.reading ||
 482        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 483          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 484        ret |= E1000_EECD_DO;
 485    return ret;
 486}
 487
 488static void
 489set_eecd(E1000State *s, int index, uint32_t val)
 490{
 491    uint32_t oldval = s->eecd_state.old_eecd;
 492
 493    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 494            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 495    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 496        return;
 497    }
 498    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 499        s->eecd_state.val_in = 0;
 500        s->eecd_state.bitnum_in = 0;
 501        s->eecd_state.bitnum_out = 0;
 502        s->eecd_state.reading = 0;
 503    }
 504    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 505        return;
 506    }
 507    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 508        s->eecd_state.bitnum_out++;
 509        return;
 510    }
 511    s->eecd_state.val_in <<= 1;
 512    if (val & E1000_EECD_DI)
 513        s->eecd_state.val_in |= 1;
 514    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 515        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 516        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 517            EEPROM_READ_OPCODE_MICROWIRE);
 518    }
 519    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 520           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 521           s->eecd_state.reading);
 522}
 523
 524static uint32_t
 525flash_eerd_read(E1000State *s, int x)
 526{
 527    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 528
 529    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 530        return (s->mac_reg[EERD]);
 531
 532    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 533        return (E1000_EEPROM_RW_REG_DONE | r);
 534
 535    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 536           E1000_EEPROM_RW_REG_DONE | r);
 537}
 538
 539static void
 540putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 541{
 542    uint32_t sum;
 543
 544    if (cse && cse < n)
 545        n = cse + 1;
 546    if (sloc < n-1) {
 547        sum = net_checksum_add(n-css, data+css);
 548        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 549    }
 550}
 551
 552static inline void
 553inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 554{
 555    if (is_broadcast_ether_addr(arr)) {
 556        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 557    } else if (is_multicast_ether_addr(arr)) {
 558        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 559    }
 560}
 561
 562static void
 563e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 564{
 565    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 566                                    PTC1023, PTC1522 };
 567
 568    NetClientState *nc = qemu_get_queue(s->nic);
 569    if (s->phy_reg[MII_BMCR] & MII_BMCR_LOOPBACK) {
 570        qemu_receive_packet(nc, buf, size);
 571    } else {
 572        qemu_send_packet(nc, buf, size);
 573    }
 574    inc_tx_bcast_or_mcast_count(s, buf);
 575    e1000x_increase_size_stats(s->mac_reg, PTCregs, size + 4);
 576}
 577
 578static void
 579xmit_seg(E1000State *s)
 580{
 581    uint16_t len;
 582    unsigned int frames = s->tx.tso_frames, css, sofar;
 583    struct e1000_tx *tp = &s->tx;
 584    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 585
 586    if (tp->cptse) {
 587        css = props->ipcss;
 588        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 589               frames, tp->size, css);
 590        if (props->ip) {    /* IPv4 */
 591            stw_be_p(tp->data+css+2, tp->size - css);
 592            stw_be_p(tp->data+css+4,
 593                     lduw_be_p(tp->data + css + 4) + frames);
 594        } else {         /* IPv6 */
 595            stw_be_p(tp->data+css+4, tp->size - css);
 596        }
 597        css = props->tucss;
 598        len = tp->size - css;
 599        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 600        if (props->tcp) {
 601            sofar = frames * props->mss;
 602            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 603            if (props->paylen - sofar > props->mss) {
 604                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 605            } else if (frames) {
 606                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 607            }
 608        } else {    /* UDP */
 609            stw_be_p(tp->data+css+4, len);
 610        }
 611        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 612            unsigned int phsum;
 613            // add pseudo-header length before checksum calculation
 614            void *sp = tp->data + props->tucso;
 615
 616            phsum = lduw_be_p(sp) + len;
 617            phsum = (phsum >> 16) + (phsum & 0xffff);
 618            stw_be_p(sp, phsum);
 619        }
 620        tp->tso_frames++;
 621    }
 622
 623    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 624        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 625    }
 626    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 627        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 628    }
 629    if (tp->vlan_needed) {
 630        memmove(tp->vlan, tp->data, 4);
 631        memmove(tp->data, tp->data + 4, 8);
 632        memcpy(tp->data + 8, tp->vlan_header, 4);
 633        e1000_send_packet(s, tp->vlan, tp->size + 4);
 634    } else {
 635        e1000_send_packet(s, tp->data, tp->size);
 636    }
 637
 638    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 639    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
 640    e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
 641    e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
 642}
 643
 644static void
 645process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 646{
 647    PCIDevice *d = PCI_DEVICE(s);
 648    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 649    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 650    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 651    unsigned int msh = 0xfffff;
 652    uint64_t addr;
 653    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 654    struct e1000_tx *tp = &s->tx;
 655
 656    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 657    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 658        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 659            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 660            s->use_tso_for_migration = 1;
 661            tp->tso_frames = 0;
 662        } else {
 663            e1000x_read_tx_ctx_descr(xp, &tp->props);
 664            s->use_tso_for_migration = 0;
 665        }
 666        return;
 667    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 668        // data descriptor
 669        if (tp->size == 0) {
 670            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 671        }
 672        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 673    } else {
 674        // legacy descriptor
 675        tp->cptse = 0;
 676    }
 677
 678    if (e1000x_vlan_enabled(s->mac_reg) &&
 679        e1000x_is_vlan_txd(txd_lower) &&
 680        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 681        tp->vlan_needed = 1;
 682        stw_be_p(tp->vlan_header,
 683                      le16_to_cpu(s->mac_reg[VET]));
 684        stw_be_p(tp->vlan_header + 2,
 685                      le16_to_cpu(dp->upper.fields.special));
 686    }
 687
 688    addr = le64_to_cpu(dp->buffer_addr);
 689    if (tp->cptse) {
 690        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 691        do {
 692            bytes = split_size;
 693            if (tp->size >= msh) {
 694                goto eop;
 695            }
 696            if (tp->size + bytes > msh)
 697                bytes = msh - tp->size;
 698
 699            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 700            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 701            sz = tp->size + bytes;
 702            if (sz >= tp->tso_props.hdr_len
 703                && tp->size < tp->tso_props.hdr_len) {
 704                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 705            }
 706            tp->size = sz;
 707            addr += bytes;
 708            if (sz == msh) {
 709                xmit_seg(s);
 710                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 711                tp->size = tp->tso_props.hdr_len;
 712            }
 713            split_size -= bytes;
 714        } while (bytes && split_size);
 715    } else {
 716        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 717        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 718        tp->size += split_size;
 719    }
 720
 721eop:
 722    if (!(txd_lower & E1000_TXD_CMD_EOP))
 723        return;
 724    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 725        xmit_seg(s);
 726    }
 727    tp->tso_frames = 0;
 728    tp->sum_needed = 0;
 729    tp->vlan_needed = 0;
 730    tp->size = 0;
 731    tp->cptse = 0;
 732}
 733
 734static uint32_t
 735txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 736{
 737    PCIDevice *d = PCI_DEVICE(s);
 738    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 739
 740    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 741        return 0;
 742    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 743                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 744    dp->upper.data = cpu_to_le32(txd_upper);
 745    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 746                  &dp->upper, sizeof(dp->upper));
 747    return E1000_ICR_TXDW;
 748}
 749
 750static uint64_t tx_desc_base(E1000State *s)
 751{
 752    uint64_t bah = s->mac_reg[TDBAH];
 753    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 754
 755    return (bah << 32) + bal;
 756}
 757
 758static void
 759start_xmit(E1000State *s)
 760{
 761    PCIDevice *d = PCI_DEVICE(s);
 762    dma_addr_t base;
 763    struct e1000_tx_desc desc;
 764    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 765
 766    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 767        DBGOUT(TX, "tx disabled\n");
 768        return;
 769    }
 770
 771    if (s->tx.busy) {
 772        return;
 773    }
 774    s->tx.busy = true;
 775
 776    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 777        base = tx_desc_base(s) +
 778               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 779        pci_dma_read(d, base, &desc, sizeof(desc));
 780
 781        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 782               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 783               desc.upper.data);
 784
 785        process_tx_desc(s, &desc);
 786        cause |= txdesc_writeback(s, base, &desc);
 787
 788        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 789            s->mac_reg[TDH] = 0;
 790        /*
 791         * the following could happen only if guest sw assigns
 792         * bogus values to TDT/TDLEN.
 793         * there's nothing too intelligent we could do about this.
 794         */
 795        if (s->mac_reg[TDH] == tdh_start ||
 796            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 797            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 798                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 799            break;
 800        }
 801    }
 802    s->tx.busy = false;
 803    set_ics(s, 0, cause);
 804}
 805
 806static int
 807receive_filter(E1000State *s, const uint8_t *buf, int size)
 808{
 809    uint32_t rctl = s->mac_reg[RCTL];
 810    int isbcast = is_broadcast_ether_addr(buf);
 811    int ismcast = is_multicast_ether_addr(buf);
 812
 813    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 814        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 815        uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci);
 816        uint32_t vfta =
 817            ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
 818                     ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
 819        if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
 820            return 0;
 821        }
 822    }
 823
 824    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 825        return 1;
 826    }
 827
 828    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 829        return 1;
 830    }
 831
 832    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 833        return 1;
 834    }
 835
 836    return e1000x_rx_group_filter(s->mac_reg, buf);
 837}
 838
 839static void
 840e1000_set_link_status(NetClientState *nc)
 841{
 842    E1000State *s = qemu_get_nic_opaque(nc);
 843    uint32_t old_status = s->mac_reg[STATUS];
 844
 845    if (nc->link_down) {
 846        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 847    } else {
 848        if (have_autoneg(s) &&
 849            !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
 850            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 851        } else {
 852            e1000_link_up(s);
 853        }
 854    }
 855
 856    if (s->mac_reg[STATUS] != old_status)
 857        set_ics(s, 0, E1000_ICR_LSC);
 858}
 859
 860static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 861{
 862    int bufs;
 863    /* Fast-path short packets */
 864    if (total_size <= s->rxbuf_size) {
 865        return s->mac_reg[RDH] != s->mac_reg[RDT];
 866    }
 867    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 868        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 869    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 870        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 871            s->mac_reg[RDT] - s->mac_reg[RDH];
 872    } else {
 873        return false;
 874    }
 875    return total_size <= bufs * s->rxbuf_size;
 876}
 877
 878static bool
 879e1000_can_receive(NetClientState *nc)
 880{
 881    E1000State *s = qemu_get_nic_opaque(nc);
 882
 883    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 884        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
 885}
 886
 887static uint64_t rx_desc_base(E1000State *s)
 888{
 889    uint64_t bah = s->mac_reg[RDBAH];
 890    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 891
 892    return (bah << 32) + bal;
 893}
 894
 895static void
 896e1000_receiver_overrun(E1000State *s, size_t size)
 897{
 898    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
 899    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
 900    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
 901    set_ics(s, 0, E1000_ICS_RXO);
 902}
 903
 904static ssize_t
 905e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 906{
 907    E1000State *s = qemu_get_nic_opaque(nc);
 908    PCIDevice *d = PCI_DEVICE(s);
 909    struct e1000_rx_desc desc;
 910    dma_addr_t base;
 911    unsigned int n, rdt;
 912    uint32_t rdh_start;
 913    uint16_t vlan_special = 0;
 914    uint8_t vlan_status = 0;
 915    uint8_t min_buf[ETH_ZLEN];
 916    struct iovec min_iov;
 917    uint8_t *filter_buf = iov->iov_base;
 918    size_t size = iov_size(iov, iovcnt);
 919    size_t iov_ofs = 0;
 920    size_t desc_offset;
 921    size_t desc_size;
 922    size_t total_size;
 923    eth_pkt_types_e pkt_type;
 924
 925    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 926        return -1;
 927    }
 928
 929    if (timer_pending(s->flush_queue_timer)) {
 930        return 0;
 931    }
 932
 933    /* Pad to minimum Ethernet frame length */
 934    if (size < sizeof(min_buf)) {
 935        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 936        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 937        min_iov.iov_base = filter_buf = min_buf;
 938        min_iov.iov_len = size = sizeof(min_buf);
 939        iovcnt = 1;
 940        iov = &min_iov;
 941    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 942        /* This is very unlikely, but may happen. */
 943        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 944        filter_buf = min_buf;
 945    }
 946
 947    /* Discard oversized packets if !LPE and !SBP. */
 948    if (e1000x_is_oversized(s->mac_reg, size)) {
 949        return size;
 950    }
 951
 952    if (!receive_filter(s, filter_buf, size)) {
 953        return size;
 954    }
 955
 956    if (e1000x_vlan_enabled(s->mac_reg) &&
 957        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 958        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 959        iov_ofs = 4;
 960        if (filter_buf == iov->iov_base) {
 961            memmove(filter_buf + 4, filter_buf, 12);
 962        } else {
 963            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 964            while (iov->iov_len <= iov_ofs) {
 965                iov_ofs -= iov->iov_len;
 966                iov++;
 967            }
 968        }
 969        vlan_status = E1000_RXD_STAT_VP;
 970        size -= 4;
 971    }
 972
 973    pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
 974    rdh_start = s->mac_reg[RDH];
 975    desc_offset = 0;
 976    total_size = size + e1000x_fcs_len(s->mac_reg);
 977    if (!e1000_has_rxbufs(s, total_size)) {
 978        e1000_receiver_overrun(s, total_size);
 979        return -1;
 980    }
 981    do {
 982        desc_size = total_size - desc_offset;
 983        if (desc_size > s->rxbuf_size) {
 984            desc_size = s->rxbuf_size;
 985        }
 986        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 987        pci_dma_read(d, base, &desc, sizeof(desc));
 988        desc.special = vlan_special;
 989        desc.status &= ~E1000_RXD_STAT_DD;
 990        if (desc.buffer_addr) {
 991            if (desc_offset < size) {
 992                size_t iov_copy;
 993                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 994                size_t copy_size = size - desc_offset;
 995                if (copy_size > s->rxbuf_size) {
 996                    copy_size = s->rxbuf_size;
 997                }
 998                do {
 999                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1000                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1001                    copy_size -= iov_copy;
1002                    ba += iov_copy;
1003                    iov_ofs += iov_copy;
1004                    if (iov_ofs == iov->iov_len) {
1005                        iov++;
1006                        iov_ofs = 0;
1007                    }
1008                } while (copy_size);
1009            }
1010            desc_offset += desc_size;
1011            desc.length = cpu_to_le16(desc_size);
1012            if (desc_offset >= total_size) {
1013                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1014            } else {
1015                /* Guest zeroing out status is not a hardware requirement.
1016                   Clear EOP in case guest didn't do it. */
1017                desc.status &= ~E1000_RXD_STAT_EOP;
1018            }
1019        } else { // as per intel docs; skip descriptors with null buf addr
1020            DBGOUT(RX, "Null RX descriptor!!\n");
1021        }
1022        pci_dma_write(d, base, &desc, sizeof(desc));
1023        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1024        pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
1025                      &desc.status, sizeof(desc.status));
1026
1027        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1028            s->mac_reg[RDH] = 0;
1029        /* see comment in start_xmit; same here */
1030        if (s->mac_reg[RDH] == rdh_start ||
1031            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1032            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1033                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1034            e1000_receiver_overrun(s, total_size);
1035            return -1;
1036        }
1037    } while (desc_offset < total_size);
1038
1039    e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
1040
1041    n = E1000_ICS_RXT0;
1042    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1043        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1044    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1045        s->rxbuf_min_shift)
1046        n |= E1000_ICS_RXDMT0;
1047
1048    set_ics(s, 0, n);
1049
1050    return size;
1051}
1052
1053static ssize_t
1054e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1055{
1056    const struct iovec iov = {
1057        .iov_base = (uint8_t *)buf,
1058        .iov_len = size
1059    };
1060
1061    return e1000_receive_iov(nc, &iov, 1);
1062}
1063
1064static uint32_t
1065mac_readreg(E1000State *s, int index)
1066{
1067    return s->mac_reg[index];
1068}
1069
1070static uint32_t
1071mac_icr_read(E1000State *s, int index)
1072{
1073    uint32_t ret = s->mac_reg[ICR];
1074
1075    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1076    set_interrupt_cause(s, 0, 0);
1077    return ret;
1078}
1079
1080static uint32_t
1081mac_read_clr4(E1000State *s, int index)
1082{
1083    uint32_t ret = s->mac_reg[index];
1084
1085    s->mac_reg[index] = 0;
1086    return ret;
1087}
1088
1089static uint32_t
1090mac_read_clr8(E1000State *s, int index)
1091{
1092    uint32_t ret = s->mac_reg[index];
1093
1094    s->mac_reg[index] = 0;
1095    s->mac_reg[index-1] = 0;
1096    return ret;
1097}
1098
1099static void
1100mac_writereg(E1000State *s, int index, uint32_t val)
1101{
1102    uint32_t macaddr[2];
1103
1104    s->mac_reg[index] = val;
1105
1106    if (index == RA + 1) {
1107        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1108        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1109        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1110    }
1111}
1112
1113static void
1114set_rdt(E1000State *s, int index, uint32_t val)
1115{
1116    s->mac_reg[index] = val & 0xffff;
1117    if (e1000_has_rxbufs(s, 1)) {
1118        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1119    }
1120}
1121
1122#define LOW_BITS_SET_FUNC(num)                             \
1123    static void                                            \
1124    set_##num##bit(E1000State *s, int index, uint32_t val) \
1125    {                                                      \
1126        s->mac_reg[index] = val & (BIT(num) - 1);          \
1127    }
1128
1129LOW_BITS_SET_FUNC(4)
1130LOW_BITS_SET_FUNC(11)
1131LOW_BITS_SET_FUNC(13)
1132LOW_BITS_SET_FUNC(16)
1133
1134static void
1135set_dlen(E1000State *s, int index, uint32_t val)
1136{
1137    s->mac_reg[index] = val & 0xfff80;
1138}
1139
1140static void
1141set_tctl(E1000State *s, int index, uint32_t val)
1142{
1143    s->mac_reg[index] = val;
1144    s->mac_reg[TDT] &= 0xffff;
1145    start_xmit(s);
1146}
1147
1148static void
1149set_icr(E1000State *s, int index, uint32_t val)
1150{
1151    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1152    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1153}
1154
1155static void
1156set_imc(E1000State *s, int index, uint32_t val)
1157{
1158    s->mac_reg[IMS] &= ~val;
1159    set_ics(s, 0, 0);
1160}
1161
1162static void
1163set_ims(E1000State *s, int index, uint32_t val)
1164{
1165    s->mac_reg[IMS] |= val;
1166    set_ics(s, 0, 0);
1167}
1168
1169#define getreg(x)    [x] = mac_readreg
1170typedef uint32_t (*readops)(E1000State *, int);
1171static const readops macreg_readops[] = {
1172    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1173    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1174    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1175    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1176    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1177    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1178    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1179    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1180    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1181    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1182    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1183    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1184    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1185    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1186    getreg(GOTCL),    getreg(RDFH),     getreg(RDFT),     getreg(RDFHS),
1187    getreg(RDFTS),    getreg(RDFPC),    getreg(TDFH),     getreg(TDFT),
1188    getreg(TDFHS),    getreg(TDFTS),    getreg(TDFPC),    getreg(AIT),
1189
1190    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1191    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1192    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1193    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1194    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1195    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1196    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1197    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1198    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1199    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1200    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1201    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1202    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1203    [MPTC]    = mac_read_clr4,
1204    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1205    [EERD]    = flash_eerd_read,
1206
1207    [CRCERRS ... MPC]     = &mac_readreg,
1208    [IP6AT ... IP6AT + 3] = &mac_readreg,    [IP4AT ... IP4AT + 6] = &mac_readreg,
1209    [FFLT ... FFLT + 6]   = &mac_readreg,
1210    [RA ... RA + 31]      = &mac_readreg,
1211    [WUPM ... WUPM + 31]  = &mac_readreg,
1212    [MTA ... MTA + E1000_MC_TBL_SIZE - 1]   = &mac_readreg,
1213    [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_readreg,
1214    [FFMT ... FFMT + 254] = &mac_readreg,
1215    [FFVT ... FFVT + 254] = &mac_readreg,
1216    [PBM ... PBM + 16383] = &mac_readreg,
1217};
1218enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1219
1220#define putreg(x)    [x] = mac_writereg
1221typedef void (*writeops)(E1000State *, int, uint32_t);
1222static const writeops macreg_writeops[] = {
1223    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1224    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1225    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1226    putreg(IPAV),     putreg(WUC),
1227    putreg(WUS),
1228
1229    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL]  = set_tctl,
1230    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]   = set_ics,
1231    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]   = set_rdt,
1232    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]   = set_icr,
1233    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL]  = set_ctrl,
1234    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV]  = set_16bit,
1235    [ITR]    = set_16bit,  [TDFH]   = set_11bit,      [TDFT]  = set_11bit,
1236    [TDFHS]  = set_13bit,  [TDFTS]  = set_13bit,      [TDFPC] = set_13bit,
1237    [RDFH]   = set_13bit,  [RDFT]   = set_13bit,      [RDFHS] = set_13bit,
1238    [RDFTS]  = set_13bit,  [RDFPC]  = set_13bit,      [AIT]   = set_16bit,
1239
1240    [IP6AT ... IP6AT + 3] = &mac_writereg, [IP4AT ... IP4AT + 6] = &mac_writereg,
1241    [FFLT ... FFLT + 6]   = &set_11bit,
1242    [RA ... RA + 31]      = &mac_writereg,
1243    [WUPM ... WUPM + 31]  = &mac_writereg,
1244    [MTA ... MTA + E1000_MC_TBL_SIZE - 1] = &mac_writereg,
1245    [VFTA ... VFTA + E1000_VLAN_FILTER_TBL_SIZE - 1] = &mac_writereg,
1246    [FFMT ... FFMT + 254] = &set_4bit,     [FFVT ... FFVT + 254] = &mac_writereg,
1247    [PBM ... PBM + 16383] = &mac_writereg,
1248};
1249
1250enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1251
1252enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1253
1254#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1255/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1256 * f - flag bits (up to 6 possible flags)
1257 * n - flag needed
1258 * p - partially implenented */
1259static const uint8_t mac_reg_access[0x8000] = {
1260    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1261    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1262
1263    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1264    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1265    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1266    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1267    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1268    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1269    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1270    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1271    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1272    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1273    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1274    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1275    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1276    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1277    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1278    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1279    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1280    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1281    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1282    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1283    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1284    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1285    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1286    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1287    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1288    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1289    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1290    [BPTC]    = markflag(MAC),
1291
1292    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1293    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1294    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1295    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1296    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1297    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1298    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1299    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1300    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1301    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1302    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1303};
1304
1305static void
1306e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1307                 unsigned size)
1308{
1309    E1000State *s = opaque;
1310    unsigned int index = (addr & 0x1ffff) >> 2;
1311
1312    if (index < NWRITEOPS && macreg_writeops[index]) {
1313        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1314            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1315            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1316                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1317                       "It is not fully implemented.\n", index<<2);
1318            }
1319            macreg_writeops[index](s, index, val);
1320        } else {    /* "flag needed" bit is set, but the flag is not active */
1321            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1322                   index<<2);
1323        }
1324    } else if (index < NREADOPS && macreg_readops[index]) {
1325        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1326               index<<2, val);
1327    } else {
1328        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1329               index<<2, val);
1330    }
1331}
1332
1333static uint64_t
1334e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1335{
1336    E1000State *s = opaque;
1337    unsigned int index = (addr & 0x1ffff) >> 2;
1338
1339    if (index < NREADOPS && macreg_readops[index]) {
1340        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1341            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1342            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1343                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1344                       "It is not fully implemented.\n", index<<2);
1345            }
1346            return macreg_readops[index](s, index);
1347        } else {    /* "flag needed" bit is set, but the flag is not active */
1348            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1349                   index<<2);
1350        }
1351    } else {
1352        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1353    }
1354    return 0;
1355}
1356
1357static const MemoryRegionOps e1000_mmio_ops = {
1358    .read = e1000_mmio_read,
1359    .write = e1000_mmio_write,
1360    .endianness = DEVICE_LITTLE_ENDIAN,
1361    .impl = {
1362        .min_access_size = 4,
1363        .max_access_size = 4,
1364    },
1365};
1366
1367static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1368                              unsigned size)
1369{
1370    E1000State *s = opaque;
1371
1372    (void)s;
1373    return 0;
1374}
1375
1376static void e1000_io_write(void *opaque, hwaddr addr,
1377                           uint64_t val, unsigned size)
1378{
1379    E1000State *s = opaque;
1380
1381    (void)s;
1382}
1383
1384static const MemoryRegionOps e1000_io_ops = {
1385    .read = e1000_io_read,
1386    .write = e1000_io_write,
1387    .endianness = DEVICE_LITTLE_ENDIAN,
1388};
1389
1390static bool is_version_1(void *opaque, int version_id)
1391{
1392    return version_id == 1;
1393}
1394
1395static int e1000_pre_save(void *opaque)
1396{
1397    E1000State *s = opaque;
1398    NetClientState *nc = qemu_get_queue(s->nic);
1399
1400    /*
1401     * If link is down and auto-negotiation is supported and ongoing,
1402     * complete auto-negotiation immediately. This allows us to look
1403     * at MII_BMSR_AN_COMP to infer link status on load.
1404     */
1405    if (nc->link_down && have_autoneg(s)) {
1406        s->phy_reg[MII_BMSR] |= MII_BMSR_AN_COMP;
1407    }
1408
1409    /* Decide which set of props to migrate in the main structure */
1410    if (chkflag(TSO) || !s->use_tso_for_migration) {
1411        /* Either we're migrating with the extra subsection, in which
1412         * case the mig_props is always 'props' OR
1413         * we've not got the subsection, but 'props' was the last
1414         * updated.
1415         */
1416        s->mig_props = s->tx.props;
1417    } else {
1418        /* We're not using the subsection, and 'tso_props' was
1419         * the last updated.
1420         */
1421        s->mig_props = s->tx.tso_props;
1422    }
1423    return 0;
1424}
1425
1426static int e1000_post_load(void *opaque, int version_id)
1427{
1428    E1000State *s = opaque;
1429    NetClientState *nc = qemu_get_queue(s->nic);
1430
1431    if (!chkflag(MIT)) {
1432        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1433            s->mac_reg[TADV] = 0;
1434        s->mit_irq_level = false;
1435    }
1436    s->mit_ide = 0;
1437    s->mit_timer_on = true;
1438    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1439
1440    /* nc.link_down can't be migrated, so infer link_down according
1441     * to link status bit in mac_reg[STATUS].
1442     * Alternatively, restart link negotiation if it was in progress. */
1443    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1444
1445    if (have_autoneg(s) && !(s->phy_reg[MII_BMSR] & MII_BMSR_AN_COMP)) {
1446        nc->link_down = false;
1447        timer_mod(s->autoneg_timer,
1448                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1449    }
1450
1451    s->tx.props = s->mig_props;
1452    if (!s->received_tx_tso) {
1453        /* We received only one set of offload data (tx.props)
1454         * and haven't got tx.tso_props.  The best we can do
1455         * is dupe the data.
1456         */
1457        s->tx.tso_props = s->mig_props;
1458    }
1459    return 0;
1460}
1461
1462static int e1000_tx_tso_post_load(void *opaque, int version_id)
1463{
1464    E1000State *s = opaque;
1465    s->received_tx_tso = true;
1466    return 0;
1467}
1468
1469static bool e1000_mit_state_needed(void *opaque)
1470{
1471    E1000State *s = opaque;
1472
1473    return chkflag(MIT);
1474}
1475
1476static bool e1000_full_mac_needed(void *opaque)
1477{
1478    E1000State *s = opaque;
1479
1480    return chkflag(MAC);
1481}
1482
1483static bool e1000_tso_state_needed(void *opaque)
1484{
1485    E1000State *s = opaque;
1486
1487    return chkflag(TSO);
1488}
1489
1490static const VMStateDescription vmstate_e1000_mit_state = {
1491    .name = "e1000/mit_state",
1492    .version_id = 1,
1493    .minimum_version_id = 1,
1494    .needed = e1000_mit_state_needed,
1495    .fields = (VMStateField[]) {
1496        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1497        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1498        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1499        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1500        VMSTATE_BOOL(mit_irq_level, E1000State),
1501        VMSTATE_END_OF_LIST()
1502    }
1503};
1504
1505static const VMStateDescription vmstate_e1000_full_mac_state = {
1506    .name = "e1000/full_mac_state",
1507    .version_id = 1,
1508    .minimum_version_id = 1,
1509    .needed = e1000_full_mac_needed,
1510    .fields = (VMStateField[]) {
1511        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1512        VMSTATE_END_OF_LIST()
1513    }
1514};
1515
1516static const VMStateDescription vmstate_e1000_tx_tso_state = {
1517    .name = "e1000/tx_tso_state",
1518    .version_id = 1,
1519    .minimum_version_id = 1,
1520    .needed = e1000_tso_state_needed,
1521    .post_load = e1000_tx_tso_post_load,
1522    .fields = (VMStateField[]) {
1523        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1524        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1525        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1526        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1527        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1528        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1529        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1530        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1531        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1532        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1533        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1534        VMSTATE_END_OF_LIST()
1535    }
1536};
1537
1538static const VMStateDescription vmstate_e1000 = {
1539    .name = "e1000",
1540    .version_id = 2,
1541    .minimum_version_id = 1,
1542    .pre_save = e1000_pre_save,
1543    .post_load = e1000_post_load,
1544    .fields = (VMStateField[]) {
1545        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1546        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1547        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1548        VMSTATE_UINT32(rxbuf_size, E1000State),
1549        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1550        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1551        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1552        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1553        VMSTATE_UINT16(eecd_state.reading, E1000State),
1554        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1555        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1556        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1557        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1558        VMSTATE_UINT8(mig_props.tucss, E1000State),
1559        VMSTATE_UINT8(mig_props.tucso, E1000State),
1560        VMSTATE_UINT16(mig_props.tucse, E1000State),
1561        VMSTATE_UINT32(mig_props.paylen, E1000State),
1562        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1563        VMSTATE_UINT16(mig_props.mss, E1000State),
1564        VMSTATE_UINT16(tx.size, E1000State),
1565        VMSTATE_UINT16(tx.tso_frames, E1000State),
1566        VMSTATE_UINT8(tx.sum_needed, E1000State),
1567        VMSTATE_INT8(mig_props.ip, E1000State),
1568        VMSTATE_INT8(mig_props.tcp, E1000State),
1569        VMSTATE_BUFFER(tx.header, E1000State),
1570        VMSTATE_BUFFER(tx.data, E1000State),
1571        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1572        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1573        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1574        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1575        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1576        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1577        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1578        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1579        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1580        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1581        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1582        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1583        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1584        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1585        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1586        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1587        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1588        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1589        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1590        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1591        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1592        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1593        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1594        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1595        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1596        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1597        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1598        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1599        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1600        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1601        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1602        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1603        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1604        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1605        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1606        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1607        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1608        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1609        VMSTATE_UINT32(mac_reg[VET], E1000State),
1610        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1611        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, E1000_MC_TBL_SIZE),
1612        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA,
1613                                 E1000_VLAN_FILTER_TBL_SIZE),
1614        VMSTATE_END_OF_LIST()
1615    },
1616    .subsections = (const VMStateDescription*[]) {
1617        &vmstate_e1000_mit_state,
1618        &vmstate_e1000_full_mac_state,
1619        &vmstate_e1000_tx_tso_state,
1620        NULL
1621    }
1622};
1623
1624/*
1625 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1626 * Note: A valid DevId will be inserted during pci_e1000_realize().
1627 */
1628static const uint16_t e1000_eeprom_template[64] = {
1629    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1630    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1631    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1632    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1633    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1634    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1635    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1636    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1637};
1638
1639/* PCI interface */
1640
1641static void
1642e1000_mmio_setup(E1000State *d)
1643{
1644    int i;
1645    const uint32_t excluded_regs[] = {
1646        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1647        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1648    };
1649
1650    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1651                          "e1000-mmio", PNPMMIO_SIZE);
1652    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1653    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1654        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1655                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1656    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1657}
1658
1659static void
1660pci_e1000_uninit(PCIDevice *dev)
1661{
1662    E1000State *d = E1000(dev);
1663
1664    timer_free(d->autoneg_timer);
1665    timer_free(d->mit_timer);
1666    timer_free(d->flush_queue_timer);
1667    qemu_del_nic(d->nic);
1668}
1669
1670static NetClientInfo net_e1000_info = {
1671    .type = NET_CLIENT_DRIVER_NIC,
1672    .size = sizeof(NICState),
1673    .can_receive = e1000_can_receive,
1674    .receive = e1000_receive,
1675    .receive_iov = e1000_receive_iov,
1676    .link_status_changed = e1000_set_link_status,
1677};
1678
1679static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1680                                uint32_t val, int len)
1681{
1682    E1000State *s = E1000(pci_dev);
1683
1684    pci_default_write_config(pci_dev, address, val, len);
1685
1686    if (range_covers_byte(address, len, PCI_COMMAND) &&
1687        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1688        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1689    }
1690}
1691
1692static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1693{
1694    DeviceState *dev = DEVICE(pci_dev);
1695    E1000State *d = E1000(pci_dev);
1696    uint8_t *pci_conf;
1697    uint8_t *macaddr;
1698
1699    pci_dev->config_write = e1000_write_config;
1700
1701    pci_conf = pci_dev->config;
1702
1703    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1704    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1705
1706    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1707
1708    e1000_mmio_setup(d);
1709
1710    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1711
1712    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1713
1714    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1715    macaddr = d->conf.macaddr.a;
1716
1717    e1000x_core_prepare_eeprom(d->eeprom_data,
1718                               e1000_eeprom_template,
1719                               sizeof(e1000_eeprom_template),
1720                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1721                               macaddr);
1722
1723    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1724                          object_get_typename(OBJECT(d)), dev->id, d);
1725
1726    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1727
1728    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1729    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1730    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1731                                        e1000_flush_queue_timer, d);
1732}
1733
1734static Property e1000_properties[] = {
1735    DEFINE_NIC_PROPERTIES(E1000State, conf),
1736    DEFINE_PROP_BIT("autonegotiation", E1000State,
1737                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1738    DEFINE_PROP_BIT("mitigation", E1000State,
1739                    compat_flags, E1000_FLAG_MIT_BIT, true),
1740    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1741                    compat_flags, E1000_FLAG_MAC_BIT, true),
1742    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1743                    compat_flags, E1000_FLAG_TSO_BIT, true),
1744    DEFINE_PROP_BIT("init-vet", E1000State,
1745                    compat_flags, E1000_FLAG_VET_BIT, true),
1746    DEFINE_PROP_END_OF_LIST(),
1747};
1748
1749typedef struct E1000Info {
1750    const char *name;
1751    uint16_t   device_id;
1752    uint8_t    revision;
1753    uint16_t   phy_id2;
1754} E1000Info;
1755
1756static void e1000_class_init(ObjectClass *klass, void *data)
1757{
1758    DeviceClass *dc = DEVICE_CLASS(klass);
1759    ResettableClass *rc = RESETTABLE_CLASS(klass);
1760    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1761    E1000BaseClass *e = E1000_CLASS(klass);
1762    const E1000Info *info = data;
1763
1764    k->realize = pci_e1000_realize;
1765    k->exit = pci_e1000_uninit;
1766    k->romfile = "efi-e1000.rom";
1767    k->vendor_id = PCI_VENDOR_ID_INTEL;
1768    k->device_id = info->device_id;
1769    k->revision = info->revision;
1770    e->phy_id2 = info->phy_id2;
1771    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1772    rc->phases.hold = e1000_reset_hold;
1773    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1774    dc->desc = "Intel Gigabit Ethernet";
1775    dc->vmsd = &vmstate_e1000;
1776    device_class_set_props(dc, e1000_properties);
1777}
1778
1779static void e1000_instance_init(Object *obj)
1780{
1781    E1000State *n = E1000(obj);
1782    device_add_bootindex_property(obj, &n->conf.bootindex,
1783                                  "bootindex", "/ethernet-phy@0",
1784                                  DEVICE(n));
1785}
1786
1787static const TypeInfo e1000_base_info = {
1788    .name          = TYPE_E1000_BASE,
1789    .parent        = TYPE_PCI_DEVICE,
1790    .instance_size = sizeof(E1000State),
1791    .instance_init = e1000_instance_init,
1792    .class_size    = sizeof(E1000BaseClass),
1793    .abstract      = true,
1794    .interfaces = (InterfaceInfo[]) {
1795        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1796        { },
1797    },
1798};
1799
1800static const E1000Info e1000_devices[] = {
1801    {
1802        .name      = "e1000",
1803        .device_id = E1000_DEV_ID_82540EM,
1804        .revision  = 0x03,
1805        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1806    },
1807    {
1808        .name      = "e1000-82544gc",
1809        .device_id = E1000_DEV_ID_82544GC_COPPER,
1810        .revision  = 0x03,
1811        .phy_id2   = E1000_PHY_ID2_82544x,
1812    },
1813    {
1814        .name      = "e1000-82545em",
1815        .device_id = E1000_DEV_ID_82545EM_COPPER,
1816        .revision  = 0x03,
1817        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1818    },
1819};
1820
1821static void e1000_register_types(void)
1822{
1823    int i;
1824
1825    type_register_static(&e1000_base_info);
1826    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1827        const E1000Info *info = &e1000_devices[i];
1828        TypeInfo type_info = {};
1829
1830        type_info.name = info->name;
1831        type_info.parent = TYPE_E1000_BASE;
1832        type_info.class_data = (void *)info;
1833        type_info.class_init = e1000_class_init;
1834
1835        type_register(&type_info);
1836    }
1837}
1838
1839type_init(e1000_register_types)
1840