qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/pci/pci.h"
  30#include "hw/qdev-properties.h"
  31#include "migration/vmstate.h"
  32#include "net/net.h"
  33#include "net/checksum.h"
  34#include "sysemu/sysemu.h"
  35#include "sysemu/dma.h"
  36#include "qemu/iov.h"
  37#include "qemu/module.h"
  38#include "qemu/range.h"
  39
  40#include "e1000x_common.h"
  41#include "trace.h"
  42
  43static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  44
  45/* #define E1000_DEBUG */
  46
  47#ifdef E1000_DEBUG
  48enum {
  49    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  50    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  51    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  52    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  53};
  54#define DBGBIT(x)    (1<<DEBUG_##x)
  55static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  56
  57#define DBGOUT(what, fmt, ...) do { \
  58    if (debugflags & DBGBIT(what)) \
  59        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  60    } while (0)
  61#else
  62#define DBGOUT(what, fmt, ...) do {} while (0)
  63#endif
  64
  65#define IOPORT_SIZE       0x40
  66#define PNPMMIO_SIZE      0x20000
  67#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  68
  69#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  70
  71/*
  72 * HW models:
  73 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  74 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  75 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  76 *  Others never tested
  77 */
  78
  79typedef struct E1000State_st {
  80    /*< private >*/
  81    PCIDevice parent_obj;
  82    /*< public >*/
  83
  84    NICState *nic;
  85    NICConf conf;
  86    MemoryRegion mmio;
  87    MemoryRegion io;
  88
  89    uint32_t mac_reg[0x8000];
  90    uint16_t phy_reg[0x20];
  91    uint16_t eeprom_data[64];
  92
  93    uint32_t rxbuf_size;
  94    uint32_t rxbuf_min_shift;
  95    struct e1000_tx {
  96        unsigned char header[256];
  97        unsigned char vlan_header[4];
  98        /* Fields vlan and data must not be reordered or separated. */
  99        unsigned char vlan[4];
 100        unsigned char data[0x10000];
 101        uint16_t size;
 102        unsigned char vlan_needed;
 103        unsigned char sum_needed;
 104        bool cptse;
 105        e1000x_txd_props props;
 106        e1000x_txd_props tso_props;
 107        uint16_t tso_frames;
 108    } tx;
 109
 110    struct {
 111        uint32_t val_in;    /* shifted in from guest driver */
 112        uint16_t bitnum_in;
 113        uint16_t bitnum_out;
 114        uint16_t reading;
 115        uint32_t old_eecd;
 116    } eecd_state;
 117
 118    QEMUTimer *autoneg_timer;
 119
 120    QEMUTimer *mit_timer;      /* Mitigation timer. */
 121    bool mit_timer_on;         /* Mitigation timer is running. */
 122    bool mit_irq_level;        /* Tracks interrupt pin level. */
 123    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 124
 125    QEMUTimer *flush_queue_timer;
 126
 127/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 128#define E1000_FLAG_AUTONEG_BIT 0
 129#define E1000_FLAG_MIT_BIT 1
 130#define E1000_FLAG_MAC_BIT 2
 131#define E1000_FLAG_TSO_BIT 3
 132#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 133#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 134#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 135#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 136    uint32_t compat_flags;
 137    bool received_tx_tso;
 138    bool use_tso_for_migration;
 139    e1000x_txd_props mig_props;
 140} E1000State;
 141
 142#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 143
 144typedef struct E1000BaseClass {
 145    PCIDeviceClass parent_class;
 146    uint16_t phy_id2;
 147} E1000BaseClass;
 148
 149#define TYPE_E1000_BASE "e1000-base"
 150
 151#define E1000(obj) \
 152    OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
 153
 154#define E1000_DEVICE_CLASS(klass) \
 155     OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
 156#define E1000_DEVICE_GET_CLASS(obj) \
 157    OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
 158
 159static void
 160e1000_link_up(E1000State *s)
 161{
 162    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
 163
 164    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 165    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 166}
 167
 168static void
 169e1000_autoneg_done(E1000State *s)
 170{
 171    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
 172
 173    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 174    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 175}
 176
 177static bool
 178have_autoneg(E1000State *s)
 179{
 180    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 181}
 182
 183static void
 184set_phy_ctrl(E1000State *s, int index, uint16_t val)
 185{
 186    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 187    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 188                                   MII_CR_RESET |
 189                                   MII_CR_RESTART_AUTO_NEG);
 190
 191    /*
 192     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 193     * migrate during auto negotiation, after migration the link will be
 194     * down.
 195     */
 196    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 197        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 198    }
 199}
 200
 201static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 202    [PHY_CTRL] = set_phy_ctrl,
 203};
 204
 205enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 206
 207enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 208static const char phy_regcap[0x20] = {
 209    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 210    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 211    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 212    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 213    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 214    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 215    [PHY_AUTONEG_EXP] = PHY_R,
 216};
 217
 218/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 219static const uint16_t phy_reg_init[] = {
 220    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 221                   MII_CR_FULL_DUPLEX |
 222                   MII_CR_AUTO_NEG_EN,
 223
 224    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 225                   MII_SR_LINK_STATUS |   /* link initially up */
 226                   MII_SR_AUTONEG_CAPS |
 227                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 228                   MII_SR_PREAMBLE_SUPPRESS |
 229                   MII_SR_EXTENDED_STATUS |
 230                   MII_SR_10T_HD_CAPS |
 231                   MII_SR_10T_FD_CAPS |
 232                   MII_SR_100X_HD_CAPS |
 233                   MII_SR_100X_FD_CAPS,
 234
 235    [PHY_ID1] = 0x141,
 236    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 237    [PHY_AUTONEG_ADV] = 0xde1,
 238    [PHY_LP_ABILITY] = 0x1e0,
 239    [PHY_1000T_CTRL] = 0x0e00,
 240    [PHY_1000T_STATUS] = 0x3c00,
 241    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 242    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 243    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 244};
 245
 246static const uint32_t mac_reg_init[] = {
 247    [PBA]     = 0x00100030,
 248    [LEDCTL]  = 0x602,
 249    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 250                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 251    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 252                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 253                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 254                E1000_STATUS_LU,
 255    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 256                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 257                E1000_MANC_RMCP_EN,
 258};
 259
 260/* Helper function, *curr == 0 means the value is not set */
 261static inline void
 262mit_update_delay(uint32_t *curr, uint32_t value)
 263{
 264    if (value && (*curr == 0 || value < *curr)) {
 265        *curr = value;
 266    }
 267}
 268
 269static void
 270set_interrupt_cause(E1000State *s, int index, uint32_t val)
 271{
 272    PCIDevice *d = PCI_DEVICE(s);
 273    uint32_t pending_ints;
 274    uint32_t mit_delay;
 275
 276    s->mac_reg[ICR] = val;
 277
 278    /*
 279     * Make sure ICR and ICS registers have the same value.
 280     * The spec says that the ICS register is write-only.  However in practice,
 281     * on real hardware ICS is readable, and for reads it has the same value as
 282     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 283     *
 284     * The VxWorks PRO/1000 driver uses this behaviour.
 285     */
 286    s->mac_reg[ICS] = val;
 287
 288    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 289    if (!s->mit_irq_level && pending_ints) {
 290        /*
 291         * Here we detect a potential raising edge. We postpone raising the
 292         * interrupt line if we are inside the mitigation delay window
 293         * (s->mit_timer_on == 1).
 294         * We provide a partial implementation of interrupt mitigation,
 295         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 296         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 297         * RADV; relative timers based on TIDV and RDTR are not implemented.
 298         */
 299        if (s->mit_timer_on) {
 300            return;
 301        }
 302        if (chkflag(MIT)) {
 303            /* Compute the next mitigation delay according to pending
 304             * interrupts and the current values of RADV (provided
 305             * RDTR!=0), TADV and ITR.
 306             * Then rearm the timer.
 307             */
 308            mit_delay = 0;
 309            if (s->mit_ide &&
 310                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 311                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 312            }
 313            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 314                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 315            }
 316            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 317
 318            /*
 319             * According to e1000 SPEC, the Ethernet controller guarantees
 320             * a maximum observable interrupt rate of 7813 interrupts/sec.
 321             * Thus if mit_delay < 500 then the delay should be set to the
 322             * minimum delay possible which is 500.
 323             */
 324            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 325
 326            s->mit_timer_on = 1;
 327            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 328                      mit_delay * 256);
 329            s->mit_ide = 0;
 330        }
 331    }
 332
 333    s->mit_irq_level = (pending_ints != 0);
 334    pci_set_irq(d, s->mit_irq_level);
 335}
 336
 337static void
 338e1000_mit_timer(void *opaque)
 339{
 340    E1000State *s = opaque;
 341
 342    s->mit_timer_on = 0;
 343    /* Call set_interrupt_cause to update the irq level (if necessary). */
 344    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 345}
 346
 347static void
 348set_ics(E1000State *s, int index, uint32_t val)
 349{
 350    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 351        s->mac_reg[IMS]);
 352    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 353}
 354
 355static void
 356e1000_autoneg_timer(void *opaque)
 357{
 358    E1000State *s = opaque;
 359    if (!qemu_get_queue(s->nic)->link_down) {
 360        e1000_autoneg_done(s);
 361        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 362    }
 363}
 364
 365static void e1000_reset(void *opaque)
 366{
 367    E1000State *d = opaque;
 368    E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
 369    uint8_t *macaddr = d->conf.macaddr.a;
 370
 371    timer_del(d->autoneg_timer);
 372    timer_del(d->mit_timer);
 373    timer_del(d->flush_queue_timer);
 374    d->mit_timer_on = 0;
 375    d->mit_irq_level = 0;
 376    d->mit_ide = 0;
 377    memset(d->phy_reg, 0, sizeof d->phy_reg);
 378    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 379    d->phy_reg[PHY_ID2] = edc->phy_id2;
 380    memset(d->mac_reg, 0, sizeof d->mac_reg);
 381    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 382    d->rxbuf_min_shift = 1;
 383    memset(&d->tx, 0, sizeof d->tx);
 384
 385    if (qemu_get_queue(d->nic)->link_down) {
 386        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
 387    }
 388
 389    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
 390}
 391
 392static void
 393set_ctrl(E1000State *s, int index, uint32_t val)
 394{
 395    /* RST is self clearing */
 396    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 397}
 398
 399static void
 400e1000_flush_queue_timer(void *opaque)
 401{
 402    E1000State *s = opaque;
 403
 404    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 405}
 406
 407static void
 408set_rx_control(E1000State *s, int index, uint32_t val)
 409{
 410    s->mac_reg[RCTL] = val;
 411    s->rxbuf_size = e1000x_rxbufsize(val);
 412    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 413    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 414           s->mac_reg[RCTL]);
 415    timer_mod(s->flush_queue_timer,
 416              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
 417}
 418
 419static void
 420set_mdic(E1000State *s, int index, uint32_t val)
 421{
 422    uint32_t data = val & E1000_MDIC_DATA_MASK;
 423    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 424
 425    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 426        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 427    else if (val & E1000_MDIC_OP_READ) {
 428        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 429        if (!(phy_regcap[addr] & PHY_R)) {
 430            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 431            val |= E1000_MDIC_ERROR;
 432        } else
 433            val = (val ^ data) | s->phy_reg[addr];
 434    } else if (val & E1000_MDIC_OP_WRITE) {
 435        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 436        if (!(phy_regcap[addr] & PHY_W)) {
 437            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 438            val |= E1000_MDIC_ERROR;
 439        } else {
 440            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 441                phyreg_writeops[addr](s, index, data);
 442            } else {
 443                s->phy_reg[addr] = data;
 444            }
 445        }
 446    }
 447    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 448
 449    if (val & E1000_MDIC_INT_EN) {
 450        set_ics(s, 0, E1000_ICR_MDAC);
 451    }
 452}
 453
 454static uint32_t
 455get_eecd(E1000State *s, int index)
 456{
 457    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 458
 459    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 460           s->eecd_state.bitnum_out, s->eecd_state.reading);
 461    if (!s->eecd_state.reading ||
 462        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 463          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 464        ret |= E1000_EECD_DO;
 465    return ret;
 466}
 467
 468static void
 469set_eecd(E1000State *s, int index, uint32_t val)
 470{
 471    uint32_t oldval = s->eecd_state.old_eecd;
 472
 473    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 474            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 475    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 476        return;
 477    }
 478    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 479        s->eecd_state.val_in = 0;
 480        s->eecd_state.bitnum_in = 0;
 481        s->eecd_state.bitnum_out = 0;
 482        s->eecd_state.reading = 0;
 483    }
 484    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 485        return;
 486    }
 487    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 488        s->eecd_state.bitnum_out++;
 489        return;
 490    }
 491    s->eecd_state.val_in <<= 1;
 492    if (val & E1000_EECD_DI)
 493        s->eecd_state.val_in |= 1;
 494    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 495        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 496        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 497            EEPROM_READ_OPCODE_MICROWIRE);
 498    }
 499    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 500           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 501           s->eecd_state.reading);
 502}
 503
 504static uint32_t
 505flash_eerd_read(E1000State *s, int x)
 506{
 507    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 508
 509    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 510        return (s->mac_reg[EERD]);
 511
 512    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 513        return (E1000_EEPROM_RW_REG_DONE | r);
 514
 515    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 516           E1000_EEPROM_RW_REG_DONE | r);
 517}
 518
 519static void
 520putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 521{
 522    uint32_t sum;
 523
 524    if (cse && cse < n)
 525        n = cse + 1;
 526    if (sloc < n-1) {
 527        sum = net_checksum_add(n-css, data+css);
 528        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
 529    }
 530}
 531
 532static inline void
 533inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 534{
 535    if (!memcmp(arr, bcast, sizeof bcast)) {
 536        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
 537    } else if (arr[0] & 1) {
 538        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
 539    }
 540}
 541
 542static void
 543e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 544{
 545    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 546                                    PTC1023, PTC1522 };
 547
 548    NetClientState *nc = qemu_get_queue(s->nic);
 549    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 550        nc->info->receive(nc, buf, size);
 551    } else {
 552        qemu_send_packet(nc, buf, size);
 553    }
 554    inc_tx_bcast_or_mcast_count(s, buf);
 555    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
 556}
 557
 558static void
 559xmit_seg(E1000State *s)
 560{
 561    uint16_t len;
 562    unsigned int frames = s->tx.tso_frames, css, sofar;
 563    struct e1000_tx *tp = &s->tx;
 564    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
 565
 566    if (tp->cptse) {
 567        css = props->ipcss;
 568        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 569               frames, tp->size, css);
 570        if (props->ip) {    /* IPv4 */
 571            stw_be_p(tp->data+css+2, tp->size - css);
 572            stw_be_p(tp->data+css+4,
 573                     lduw_be_p(tp->data + css + 4) + frames);
 574        } else {         /* IPv6 */
 575            stw_be_p(tp->data+css+4, tp->size - css);
 576        }
 577        css = props->tucss;
 578        len = tp->size - css;
 579        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
 580        if (props->tcp) {
 581            sofar = frames * props->mss;
 582            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 583            if (props->paylen - sofar > props->mss) {
 584                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 585            } else if (frames) {
 586                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
 587            }
 588        } else {    /* UDP */
 589            stw_be_p(tp->data+css+4, len);
 590        }
 591        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 592            unsigned int phsum;
 593            // add pseudo-header length before checksum calculation
 594            void *sp = tp->data + props->tucso;
 595
 596            phsum = lduw_be_p(sp) + len;
 597            phsum = (phsum >> 16) + (phsum & 0xffff);
 598            stw_be_p(sp, phsum);
 599        }
 600        tp->tso_frames++;
 601    }
 602
 603    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 604        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
 605    }
 606    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
 607        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
 608    }
 609    if (tp->vlan_needed) {
 610        memmove(tp->vlan, tp->data, 4);
 611        memmove(tp->data, tp->data + 4, 8);
 612        memcpy(tp->data + 8, tp->vlan_header, 4);
 613        e1000_send_packet(s, tp->vlan, tp->size + 4);
 614    } else {
 615        e1000_send_packet(s, tp->data, tp->size);
 616    }
 617
 618    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
 619    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
 620    s->mac_reg[GPTC] = s->mac_reg[TPT];
 621    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 622    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 623}
 624
 625static void
 626process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 627{
 628    PCIDevice *d = PCI_DEVICE(s);
 629    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 630    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 631    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
 632    unsigned int msh = 0xfffff;
 633    uint64_t addr;
 634    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 635    struct e1000_tx *tp = &s->tx;
 636
 637    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 638    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 639        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 640            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
 641            s->use_tso_for_migration = 1;
 642            tp->tso_frames = 0;
 643        } else {
 644            e1000x_read_tx_ctx_descr(xp, &tp->props);
 645            s->use_tso_for_migration = 0;
 646        }
 647        return;
 648    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 649        // data descriptor
 650        if (tp->size == 0) {
 651            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 652        }
 653        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
 654    } else {
 655        // legacy descriptor
 656        tp->cptse = 0;
 657    }
 658
 659    if (e1000x_vlan_enabled(s->mac_reg) &&
 660        e1000x_is_vlan_txd(txd_lower) &&
 661        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 662        tp->vlan_needed = 1;
 663        stw_be_p(tp->vlan_header,
 664                      le16_to_cpu(s->mac_reg[VET]));
 665        stw_be_p(tp->vlan_header + 2,
 666                      le16_to_cpu(dp->upper.fields.special));
 667    }
 668
 669    addr = le64_to_cpu(dp->buffer_addr);
 670    if (tp->cptse) {
 671        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
 672        do {
 673            bytes = split_size;
 674            if (tp->size + bytes > msh)
 675                bytes = msh - tp->size;
 676
 677            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 678            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 679            sz = tp->size + bytes;
 680            if (sz >= tp->tso_props.hdr_len
 681                && tp->size < tp->tso_props.hdr_len) {
 682                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
 683            }
 684            tp->size = sz;
 685            addr += bytes;
 686            if (sz == msh) {
 687                xmit_seg(s);
 688                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
 689                tp->size = tp->tso_props.hdr_len;
 690            }
 691            split_size -= bytes;
 692        } while (bytes && split_size);
 693    } else {
 694        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 695        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 696        tp->size += split_size;
 697    }
 698
 699    if (!(txd_lower & E1000_TXD_CMD_EOP))
 700        return;
 701    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
 702        xmit_seg(s);
 703    }
 704    tp->tso_frames = 0;
 705    tp->sum_needed = 0;
 706    tp->vlan_needed = 0;
 707    tp->size = 0;
 708    tp->cptse = 0;
 709}
 710
 711static uint32_t
 712txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 713{
 714    PCIDevice *d = PCI_DEVICE(s);
 715    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 716
 717    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 718        return 0;
 719    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 720                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 721    dp->upper.data = cpu_to_le32(txd_upper);
 722    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 723                  &dp->upper, sizeof(dp->upper));
 724    return E1000_ICR_TXDW;
 725}
 726
 727static uint64_t tx_desc_base(E1000State *s)
 728{
 729    uint64_t bah = s->mac_reg[TDBAH];
 730    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 731
 732    return (bah << 32) + bal;
 733}
 734
 735static void
 736start_xmit(E1000State *s)
 737{
 738    PCIDevice *d = PCI_DEVICE(s);
 739    dma_addr_t base;
 740    struct e1000_tx_desc desc;
 741    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 742
 743    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 744        DBGOUT(TX, "tx disabled\n");
 745        return;
 746    }
 747
 748    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 749        base = tx_desc_base(s) +
 750               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 751        pci_dma_read(d, base, &desc, sizeof(desc));
 752
 753        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 754               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 755               desc.upper.data);
 756
 757        process_tx_desc(s, &desc);
 758        cause |= txdesc_writeback(s, base, &desc);
 759
 760        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 761            s->mac_reg[TDH] = 0;
 762        /*
 763         * the following could happen only if guest sw assigns
 764         * bogus values to TDT/TDLEN.
 765         * there's nothing too intelligent we could do about this.
 766         */
 767        if (s->mac_reg[TDH] == tdh_start ||
 768            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 769            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 770                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 771            break;
 772        }
 773    }
 774    set_ics(s, 0, cause);
 775}
 776
 777static int
 778receive_filter(E1000State *s, const uint8_t *buf, int size)
 779{
 780    uint32_t rctl = s->mac_reg[RCTL];
 781    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 782
 783    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
 784        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
 785        uint16_t vid = lduw_be_p(buf + 14);
 786        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
 787                                 ((vid >> 5) & 0x7f));
 788        if ((vfta & (1 << (vid & 0x1f))) == 0)
 789            return 0;
 790    }
 791
 792    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 793        return 1;
 794    }
 795
 796    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 797        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
 798        return 1;
 799    }
 800
 801    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 802        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
 803        return 1;
 804    }
 805
 806    return e1000x_rx_group_filter(s->mac_reg, buf);
 807}
 808
 809static void
 810e1000_set_link_status(NetClientState *nc)
 811{
 812    E1000State *s = qemu_get_nic_opaque(nc);
 813    uint32_t old_status = s->mac_reg[STATUS];
 814
 815    if (nc->link_down) {
 816        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
 817    } else {
 818        if (have_autoneg(s) &&
 819            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 820            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
 821        } else {
 822            e1000_link_up(s);
 823        }
 824    }
 825
 826    if (s->mac_reg[STATUS] != old_status)
 827        set_ics(s, 0, E1000_ICR_LSC);
 828}
 829
 830static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 831{
 832    int bufs;
 833    /* Fast-path short packets */
 834    if (total_size <= s->rxbuf_size) {
 835        return s->mac_reg[RDH] != s->mac_reg[RDT];
 836    }
 837    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 838        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 839    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
 840        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 841            s->mac_reg[RDT] - s->mac_reg[RDH];
 842    } else {
 843        return false;
 844    }
 845    return total_size <= bufs * s->rxbuf_size;
 846}
 847
 848static bool
 849e1000_can_receive(NetClientState *nc)
 850{
 851    E1000State *s = qemu_get_nic_opaque(nc);
 852
 853    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
 854        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
 855}
 856
 857static uint64_t rx_desc_base(E1000State *s)
 858{
 859    uint64_t bah = s->mac_reg[RDBAH];
 860    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 861
 862    return (bah << 32) + bal;
 863}
 864
 865static void
 866e1000_receiver_overrun(E1000State *s, size_t size)
 867{
 868    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
 869    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
 870    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
 871    set_ics(s, 0, E1000_ICS_RXO);
 872}
 873
 874static ssize_t
 875e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 876{
 877    E1000State *s = qemu_get_nic_opaque(nc);
 878    PCIDevice *d = PCI_DEVICE(s);
 879    struct e1000_rx_desc desc;
 880    dma_addr_t base;
 881    unsigned int n, rdt;
 882    uint32_t rdh_start;
 883    uint16_t vlan_special = 0;
 884    uint8_t vlan_status = 0;
 885    uint8_t min_buf[MIN_BUF_SIZE];
 886    struct iovec min_iov;
 887    uint8_t *filter_buf = iov->iov_base;
 888    size_t size = iov_size(iov, iovcnt);
 889    size_t iov_ofs = 0;
 890    size_t desc_offset;
 891    size_t desc_size;
 892    size_t total_size;
 893
 894    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
 895        return -1;
 896    }
 897
 898    if (timer_pending(s->flush_queue_timer)) {
 899        return 0;
 900    }
 901
 902    /* Pad to minimum Ethernet frame length */
 903    if (size < sizeof(min_buf)) {
 904        iov_to_buf(iov, iovcnt, 0, min_buf, size);
 905        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 906        min_iov.iov_base = filter_buf = min_buf;
 907        min_iov.iov_len = size = sizeof(min_buf);
 908        iovcnt = 1;
 909        iov = &min_iov;
 910    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
 911        /* This is very unlikely, but may happen. */
 912        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
 913        filter_buf = min_buf;
 914    }
 915
 916    /* Discard oversized packets if !LPE and !SBP. */
 917    if (e1000x_is_oversized(s->mac_reg, size)) {
 918        return size;
 919    }
 920
 921    if (!receive_filter(s, filter_buf, size)) {
 922        return size;
 923    }
 924
 925    if (e1000x_vlan_enabled(s->mac_reg) &&
 926        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
 927        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
 928        iov_ofs = 4;
 929        if (filter_buf == iov->iov_base) {
 930            memmove(filter_buf + 4, filter_buf, 12);
 931        } else {
 932            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
 933            while (iov->iov_len <= iov_ofs) {
 934                iov_ofs -= iov->iov_len;
 935                iov++;
 936            }
 937        }
 938        vlan_status = E1000_RXD_STAT_VP;
 939        size -= 4;
 940    }
 941
 942    rdh_start = s->mac_reg[RDH];
 943    desc_offset = 0;
 944    total_size = size + e1000x_fcs_len(s->mac_reg);
 945    if (!e1000_has_rxbufs(s, total_size)) {
 946        e1000_receiver_overrun(s, total_size);
 947        return -1;
 948    }
 949    do {
 950        desc_size = total_size - desc_offset;
 951        if (desc_size > s->rxbuf_size) {
 952            desc_size = s->rxbuf_size;
 953        }
 954        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 955        pci_dma_read(d, base, &desc, sizeof(desc));
 956        desc.special = vlan_special;
 957        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 958        if (desc.buffer_addr) {
 959            if (desc_offset < size) {
 960                size_t iov_copy;
 961                hwaddr ba = le64_to_cpu(desc.buffer_addr);
 962                size_t copy_size = size - desc_offset;
 963                if (copy_size > s->rxbuf_size) {
 964                    copy_size = s->rxbuf_size;
 965                }
 966                do {
 967                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 968                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
 969                    copy_size -= iov_copy;
 970                    ba += iov_copy;
 971                    iov_ofs += iov_copy;
 972                    if (iov_ofs == iov->iov_len) {
 973                        iov++;
 974                        iov_ofs = 0;
 975                    }
 976                } while (copy_size);
 977            }
 978            desc_offset += desc_size;
 979            desc.length = cpu_to_le16(desc_size);
 980            if (desc_offset >= total_size) {
 981                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 982            } else {
 983                /* Guest zeroing out status is not a hardware requirement.
 984                   Clear EOP in case guest didn't do it. */
 985                desc.status &= ~E1000_RXD_STAT_EOP;
 986            }
 987        } else { // as per intel docs; skip descriptors with null buf addr
 988            DBGOUT(RX, "Null RX descriptor!!\n");
 989        }
 990        pci_dma_write(d, base, &desc, sizeof(desc));
 991
 992        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 993            s->mac_reg[RDH] = 0;
 994        /* see comment in start_xmit; same here */
 995        if (s->mac_reg[RDH] == rdh_start ||
 996            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
 997            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
 998                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
 999            e1000_receiver_overrun(s, total_size);
1000            return -1;
1001        }
1002    } while (desc_offset < total_size);
1003
1004    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
1005
1006    n = E1000_ICS_RXT0;
1007    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1008        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1009    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1010        s->rxbuf_min_shift)
1011        n |= E1000_ICS_RXDMT0;
1012
1013    set_ics(s, 0, n);
1014
1015    return size;
1016}
1017
1018static ssize_t
1019e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1020{
1021    const struct iovec iov = {
1022        .iov_base = (uint8_t *)buf,
1023        .iov_len = size
1024    };
1025
1026    return e1000_receive_iov(nc, &iov, 1);
1027}
1028
1029static uint32_t
1030mac_readreg(E1000State *s, int index)
1031{
1032    return s->mac_reg[index];
1033}
1034
1035static uint32_t
1036mac_low4_read(E1000State *s, int index)
1037{
1038    return s->mac_reg[index] & 0xf;
1039}
1040
1041static uint32_t
1042mac_low11_read(E1000State *s, int index)
1043{
1044    return s->mac_reg[index] & 0x7ff;
1045}
1046
1047static uint32_t
1048mac_low13_read(E1000State *s, int index)
1049{
1050    return s->mac_reg[index] & 0x1fff;
1051}
1052
1053static uint32_t
1054mac_low16_read(E1000State *s, int index)
1055{
1056    return s->mac_reg[index] & 0xffff;
1057}
1058
1059static uint32_t
1060mac_icr_read(E1000State *s, int index)
1061{
1062    uint32_t ret = s->mac_reg[ICR];
1063
1064    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1065    set_interrupt_cause(s, 0, 0);
1066    return ret;
1067}
1068
1069static uint32_t
1070mac_read_clr4(E1000State *s, int index)
1071{
1072    uint32_t ret = s->mac_reg[index];
1073
1074    s->mac_reg[index] = 0;
1075    return ret;
1076}
1077
1078static uint32_t
1079mac_read_clr8(E1000State *s, int index)
1080{
1081    uint32_t ret = s->mac_reg[index];
1082
1083    s->mac_reg[index] = 0;
1084    s->mac_reg[index-1] = 0;
1085    return ret;
1086}
1087
1088static void
1089mac_writereg(E1000State *s, int index, uint32_t val)
1090{
1091    uint32_t macaddr[2];
1092
1093    s->mac_reg[index] = val;
1094
1095    if (index == RA + 1) {
1096        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1097        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1098        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1099    }
1100}
1101
1102static void
1103set_rdt(E1000State *s, int index, uint32_t val)
1104{
1105    s->mac_reg[index] = val & 0xffff;
1106    if (e1000_has_rxbufs(s, 1)) {
1107        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1108    }
1109}
1110
1111static void
1112set_16bit(E1000State *s, int index, uint32_t val)
1113{
1114    s->mac_reg[index] = val & 0xffff;
1115}
1116
1117static void
1118set_dlen(E1000State *s, int index, uint32_t val)
1119{
1120    s->mac_reg[index] = val & 0xfff80;
1121}
1122
1123static void
1124set_tctl(E1000State *s, int index, uint32_t val)
1125{
1126    s->mac_reg[index] = val;
1127    s->mac_reg[TDT] &= 0xffff;
1128    start_xmit(s);
1129}
1130
1131static void
1132set_icr(E1000State *s, int index, uint32_t val)
1133{
1134    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1135    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1136}
1137
1138static void
1139set_imc(E1000State *s, int index, uint32_t val)
1140{
1141    s->mac_reg[IMS] &= ~val;
1142    set_ics(s, 0, 0);
1143}
1144
1145static void
1146set_ims(E1000State *s, int index, uint32_t val)
1147{
1148    s->mac_reg[IMS] |= val;
1149    set_ics(s, 0, 0);
1150}
1151
1152#define getreg(x)    [x] = mac_readreg
1153typedef uint32_t (*readops)(E1000State *, int);
1154static const readops macreg_readops[] = {
1155    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1156    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1157    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1158    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1159    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1160    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1161    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1162    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1163    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1164    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1165    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
1166    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1167    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1168    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1169    getreg(GOTCL),
1170
1171    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1172    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1173    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1174    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1175    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1176    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1177    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1178    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1179    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1180    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1181    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1182    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1183    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1184    [MPTC]    = mac_read_clr4,
1185    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1186    [EERD]    = flash_eerd_read,
1187    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1188    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1189    [RDFPC]   = mac_low13_read,
1190    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1191    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1192    [TDFPC]   = mac_low13_read,
1193    [AIT]     = mac_low16_read,
1194
1195    [CRCERRS ... MPC]   = &mac_readreg,
1196    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1197    [FFLT ... FFLT+6]   = &mac_low11_read,
1198    [RA ... RA+31]      = &mac_readreg,
1199    [WUPM ... WUPM+31]  = &mac_readreg,
1200    [MTA ... MTA+127]   = &mac_readreg,
1201    [VFTA ... VFTA+127] = &mac_readreg,
1202    [FFMT ... FFMT+254] = &mac_low4_read,
1203    [FFVT ... FFVT+254] = &mac_readreg,
1204    [PBM ... PBM+16383] = &mac_readreg,
1205};
1206enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1207
1208#define putreg(x)    [x] = mac_writereg
1209typedef void (*writeops)(E1000State *, int, uint32_t);
1210static const writeops macreg_writeops[] = {
1211    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1212    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1213    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1214    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1215    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1216    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1217    putreg(WUS),      putreg(AIT),
1218
1219    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1220    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1221    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1222    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1223    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1224    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1225    [ITR]    = set_16bit,
1226
1227    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1228    [FFLT ... FFLT+6]   = &mac_writereg,
1229    [RA ... RA+31]      = &mac_writereg,
1230    [WUPM ... WUPM+31]  = &mac_writereg,
1231    [MTA ... MTA+127]   = &mac_writereg,
1232    [VFTA ... VFTA+127] = &mac_writereg,
1233    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1234    [PBM ... PBM+16383] = &mac_writereg,
1235};
1236
1237enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1238
1239enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1240
1241#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1242/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1243 * f - flag bits (up to 6 possible flags)
1244 * n - flag needed
1245 * p - partially implenented */
1246static const uint8_t mac_reg_access[0x8000] = {
1247    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1248    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1249
1250    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1251    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1252    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1253    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1254    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1255    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1256    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1257    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1258    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1259    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1260    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1261    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1262    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
1263    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1264    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1265    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1266    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1267    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1268    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1269    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1270    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1271    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1272    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1273    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1274    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1275    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1276    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1277    [BPTC]    = markflag(MAC),
1278
1279    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1280    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1281    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1282    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1283    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1284    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1285    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1286    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1287    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1288    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1289    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1290};
1291
1292static void
1293e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1294                 unsigned size)
1295{
1296    E1000State *s = opaque;
1297    unsigned int index = (addr & 0x1ffff) >> 2;
1298
1299    if (index < NWRITEOPS && macreg_writeops[index]) {
1300        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1301            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1302            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1303                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1304                       "It is not fully implemented.\n", index<<2);
1305            }
1306            macreg_writeops[index](s, index, val);
1307        } else {    /* "flag needed" bit is set, but the flag is not active */
1308            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1309                   index<<2);
1310        }
1311    } else if (index < NREADOPS && macreg_readops[index]) {
1312        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1313               index<<2, val);
1314    } else {
1315        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1316               index<<2, val);
1317    }
1318}
1319
1320static uint64_t
1321e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1322{
1323    E1000State *s = opaque;
1324    unsigned int index = (addr & 0x1ffff) >> 2;
1325
1326    if (index < NREADOPS && macreg_readops[index]) {
1327        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1328            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1329            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1330                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1331                       "It is not fully implemented.\n", index<<2);
1332            }
1333            return macreg_readops[index](s, index);
1334        } else {    /* "flag needed" bit is set, but the flag is not active */
1335            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1336                   index<<2);
1337        }
1338    } else {
1339        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1340    }
1341    return 0;
1342}
1343
1344static const MemoryRegionOps e1000_mmio_ops = {
1345    .read = e1000_mmio_read,
1346    .write = e1000_mmio_write,
1347    .endianness = DEVICE_LITTLE_ENDIAN,
1348    .impl = {
1349        .min_access_size = 4,
1350        .max_access_size = 4,
1351    },
1352};
1353
1354static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1355                              unsigned size)
1356{
1357    E1000State *s = opaque;
1358
1359    (void)s;
1360    return 0;
1361}
1362
1363static void e1000_io_write(void *opaque, hwaddr addr,
1364                           uint64_t val, unsigned size)
1365{
1366    E1000State *s = opaque;
1367
1368    (void)s;
1369}
1370
1371static const MemoryRegionOps e1000_io_ops = {
1372    .read = e1000_io_read,
1373    .write = e1000_io_write,
1374    .endianness = DEVICE_LITTLE_ENDIAN,
1375};
1376
1377static bool is_version_1(void *opaque, int version_id)
1378{
1379    return version_id == 1;
1380}
1381
1382static int e1000_pre_save(void *opaque)
1383{
1384    E1000State *s = opaque;
1385    NetClientState *nc = qemu_get_queue(s->nic);
1386
1387    /*
1388     * If link is down and auto-negotiation is supported and ongoing,
1389     * complete auto-negotiation immediately. This allows us to look
1390     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1391     */
1392    if (nc->link_down && have_autoneg(s)) {
1393        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1394    }
1395
1396    /* Decide which set of props to migrate in the main structure */
1397    if (chkflag(TSO) || !s->use_tso_for_migration) {
1398        /* Either we're migrating with the extra subsection, in which
1399         * case the mig_props is always 'props' OR
1400         * we've not got the subsection, but 'props' was the last
1401         * updated.
1402         */
1403        s->mig_props = s->tx.props;
1404    } else {
1405        /* We're not using the subsection, and 'tso_props' was
1406         * the last updated.
1407         */
1408        s->mig_props = s->tx.tso_props;
1409    }
1410    return 0;
1411}
1412
1413static int e1000_post_load(void *opaque, int version_id)
1414{
1415    E1000State *s = opaque;
1416    NetClientState *nc = qemu_get_queue(s->nic);
1417
1418    if (!chkflag(MIT)) {
1419        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1420            s->mac_reg[TADV] = 0;
1421        s->mit_irq_level = false;
1422    }
1423    s->mit_ide = 0;
1424    s->mit_timer_on = true;
1425    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
1426
1427    /* nc.link_down can't be migrated, so infer link_down according
1428     * to link status bit in mac_reg[STATUS].
1429     * Alternatively, restart link negotiation if it was in progress. */
1430    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1431
1432    if (have_autoneg(s) &&
1433        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1434        nc->link_down = false;
1435        timer_mod(s->autoneg_timer,
1436                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1437    }
1438
1439    s->tx.props = s->mig_props;
1440    if (!s->received_tx_tso) {
1441        /* We received only one set of offload data (tx.props)
1442         * and haven't got tx.tso_props.  The best we can do
1443         * is dupe the data.
1444         */
1445        s->tx.tso_props = s->mig_props;
1446    }
1447    return 0;
1448}
1449
1450static int e1000_tx_tso_post_load(void *opaque, int version_id)
1451{
1452    E1000State *s = opaque;
1453    s->received_tx_tso = true;
1454    return 0;
1455}
1456
1457static bool e1000_mit_state_needed(void *opaque)
1458{
1459    E1000State *s = opaque;
1460
1461    return chkflag(MIT);
1462}
1463
1464static bool e1000_full_mac_needed(void *opaque)
1465{
1466    E1000State *s = opaque;
1467
1468    return chkflag(MAC);
1469}
1470
1471static bool e1000_tso_state_needed(void *opaque)
1472{
1473    E1000State *s = opaque;
1474
1475    return chkflag(TSO);
1476}
1477
1478static const VMStateDescription vmstate_e1000_mit_state = {
1479    .name = "e1000/mit_state",
1480    .version_id = 1,
1481    .minimum_version_id = 1,
1482    .needed = e1000_mit_state_needed,
1483    .fields = (VMStateField[]) {
1484        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1485        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1486        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1487        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1488        VMSTATE_BOOL(mit_irq_level, E1000State),
1489        VMSTATE_END_OF_LIST()
1490    }
1491};
1492
1493static const VMStateDescription vmstate_e1000_full_mac_state = {
1494    .name = "e1000/full_mac_state",
1495    .version_id = 1,
1496    .minimum_version_id = 1,
1497    .needed = e1000_full_mac_needed,
1498    .fields = (VMStateField[]) {
1499        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1500        VMSTATE_END_OF_LIST()
1501    }
1502};
1503
1504static const VMStateDescription vmstate_e1000_tx_tso_state = {
1505    .name = "e1000/tx_tso_state",
1506    .version_id = 1,
1507    .minimum_version_id = 1,
1508    .needed = e1000_tso_state_needed,
1509    .post_load = e1000_tx_tso_post_load,
1510    .fields = (VMStateField[]) {
1511        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
1512        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
1513        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
1514        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
1515        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
1516        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
1517        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
1518        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
1519        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
1520        VMSTATE_INT8(tx.tso_props.ip, E1000State),
1521        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
1522        VMSTATE_END_OF_LIST()
1523    }
1524};
1525
1526static const VMStateDescription vmstate_e1000 = {
1527    .name = "e1000",
1528    .version_id = 2,
1529    .minimum_version_id = 1,
1530    .pre_save = e1000_pre_save,
1531    .post_load = e1000_post_load,
1532    .fields = (VMStateField[]) {
1533        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1534        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1535        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1536        VMSTATE_UINT32(rxbuf_size, E1000State),
1537        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1538        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1539        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1540        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1541        VMSTATE_UINT16(eecd_state.reading, E1000State),
1542        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1543        VMSTATE_UINT8(mig_props.ipcss, E1000State),
1544        VMSTATE_UINT8(mig_props.ipcso, E1000State),
1545        VMSTATE_UINT16(mig_props.ipcse, E1000State),
1546        VMSTATE_UINT8(mig_props.tucss, E1000State),
1547        VMSTATE_UINT8(mig_props.tucso, E1000State),
1548        VMSTATE_UINT16(mig_props.tucse, E1000State),
1549        VMSTATE_UINT32(mig_props.paylen, E1000State),
1550        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
1551        VMSTATE_UINT16(mig_props.mss, E1000State),
1552        VMSTATE_UINT16(tx.size, E1000State),
1553        VMSTATE_UINT16(tx.tso_frames, E1000State),
1554        VMSTATE_UINT8(tx.sum_needed, E1000State),
1555        VMSTATE_INT8(mig_props.ip, E1000State),
1556        VMSTATE_INT8(mig_props.tcp, E1000State),
1557        VMSTATE_BUFFER(tx.header, E1000State),
1558        VMSTATE_BUFFER(tx.data, E1000State),
1559        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1560        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1561        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1562        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1563        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1564        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1565        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1566        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1567        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1568        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1569        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1570        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1571        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1572        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1573        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1574        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1575        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1576        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1577        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1578        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1579        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1580        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1581        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1582        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1583        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1584        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1585        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1586        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1587        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1588        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1589        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1590        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1591        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1592        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1593        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1594        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1595        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1596        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1597        VMSTATE_UINT32(mac_reg[VET], E1000State),
1598        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1599        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1600        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1601        VMSTATE_END_OF_LIST()
1602    },
1603    .subsections = (const VMStateDescription*[]) {
1604        &vmstate_e1000_mit_state,
1605        &vmstate_e1000_full_mac_state,
1606        &vmstate_e1000_tx_tso_state,
1607        NULL
1608    }
1609};
1610
1611/*
1612 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1613 * Note: A valid DevId will be inserted during pci_e1000_realize().
1614 */
1615static const uint16_t e1000_eeprom_template[64] = {
1616    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1617    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1618    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1619    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1620    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1621    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1622    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1623    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1624};
1625
1626/* PCI interface */
1627
1628static void
1629e1000_mmio_setup(E1000State *d)
1630{
1631    int i;
1632    const uint32_t excluded_regs[] = {
1633        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1634        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1635    };
1636
1637    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1638                          "e1000-mmio", PNPMMIO_SIZE);
1639    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1640    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1641        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1642                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1643    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1644}
1645
1646static void
1647pci_e1000_uninit(PCIDevice *dev)
1648{
1649    E1000State *d = E1000(dev);
1650
1651    timer_del(d->autoneg_timer);
1652    timer_free(d->autoneg_timer);
1653    timer_del(d->mit_timer);
1654    timer_free(d->mit_timer);
1655    timer_del(d->flush_queue_timer);
1656    timer_free(d->flush_queue_timer);
1657    qemu_del_nic(d->nic);
1658}
1659
1660static NetClientInfo net_e1000_info = {
1661    .type = NET_CLIENT_DRIVER_NIC,
1662    .size = sizeof(NICState),
1663    .can_receive = e1000_can_receive,
1664    .receive = e1000_receive,
1665    .receive_iov = e1000_receive_iov,
1666    .link_status_changed = e1000_set_link_status,
1667};
1668
1669static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1670                                uint32_t val, int len)
1671{
1672    E1000State *s = E1000(pci_dev);
1673
1674    pci_default_write_config(pci_dev, address, val, len);
1675
1676    if (range_covers_byte(address, len, PCI_COMMAND) &&
1677        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1678        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1679    }
1680}
1681
1682static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1683{
1684    DeviceState *dev = DEVICE(pci_dev);
1685    E1000State *d = E1000(pci_dev);
1686    uint8_t *pci_conf;
1687    uint8_t *macaddr;
1688
1689    pci_dev->config_write = e1000_write_config;
1690
1691    pci_conf = pci_dev->config;
1692
1693    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1694    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1695
1696    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1697
1698    e1000_mmio_setup(d);
1699
1700    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1701
1702    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1703
1704    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1705    macaddr = d->conf.macaddr.a;
1706
1707    e1000x_core_prepare_eeprom(d->eeprom_data,
1708                               e1000_eeprom_template,
1709                               sizeof(e1000_eeprom_template),
1710                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
1711                               macaddr);
1712
1713    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1714                          object_get_typename(OBJECT(d)), dev->id, d);
1715
1716    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1717
1718    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1719    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1720    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1721                                        e1000_flush_queue_timer, d);
1722}
1723
1724static void qdev_e1000_reset(DeviceState *dev)
1725{
1726    E1000State *d = E1000(dev);
1727    e1000_reset(d);
1728}
1729
1730static Property e1000_properties[] = {
1731    DEFINE_NIC_PROPERTIES(E1000State, conf),
1732    DEFINE_PROP_BIT("autonegotiation", E1000State,
1733                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1734    DEFINE_PROP_BIT("mitigation", E1000State,
1735                    compat_flags, E1000_FLAG_MIT_BIT, true),
1736    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1737                    compat_flags, E1000_FLAG_MAC_BIT, true),
1738    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
1739                    compat_flags, E1000_FLAG_TSO_BIT, true),
1740    DEFINE_PROP_END_OF_LIST(),
1741};
1742
1743typedef struct E1000Info {
1744    const char *name;
1745    uint16_t   device_id;
1746    uint8_t    revision;
1747    uint16_t   phy_id2;
1748} E1000Info;
1749
1750static void e1000_class_init(ObjectClass *klass, void *data)
1751{
1752    DeviceClass *dc = DEVICE_CLASS(klass);
1753    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1754    E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1755    const E1000Info *info = data;
1756
1757    k->realize = pci_e1000_realize;
1758    k->exit = pci_e1000_uninit;
1759    k->romfile = "efi-e1000.rom";
1760    k->vendor_id = PCI_VENDOR_ID_INTEL;
1761    k->device_id = info->device_id;
1762    k->revision = info->revision;
1763    e->phy_id2 = info->phy_id2;
1764    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1765    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1766    dc->desc = "Intel Gigabit Ethernet";
1767    dc->reset = qdev_e1000_reset;
1768    dc->vmsd = &vmstate_e1000;
1769    device_class_set_props(dc, e1000_properties);
1770}
1771
1772static void e1000_instance_init(Object *obj)
1773{
1774    E1000State *n = E1000(obj);
1775    device_add_bootindex_property(obj, &n->conf.bootindex,
1776                                  "bootindex", "/ethernet-phy@0",
1777                                  DEVICE(n));
1778}
1779
1780static const TypeInfo e1000_base_info = {
1781    .name          = TYPE_E1000_BASE,
1782    .parent        = TYPE_PCI_DEVICE,
1783    .instance_size = sizeof(E1000State),
1784    .instance_init = e1000_instance_init,
1785    .class_size    = sizeof(E1000BaseClass),
1786    .abstract      = true,
1787    .interfaces = (InterfaceInfo[]) {
1788        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1789        { },
1790    },
1791};
1792
1793static const E1000Info e1000_devices[] = {
1794    {
1795        .name      = "e1000",
1796        .device_id = E1000_DEV_ID_82540EM,
1797        .revision  = 0x03,
1798        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1799    },
1800    {
1801        .name      = "e1000-82544gc",
1802        .device_id = E1000_DEV_ID_82544GC_COPPER,
1803        .revision  = 0x03,
1804        .phy_id2   = E1000_PHY_ID2_82544x,
1805    },
1806    {
1807        .name      = "e1000-82545em",
1808        .device_id = E1000_DEV_ID_82545EM_COPPER,
1809        .revision  = 0x03,
1810        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1811    },
1812};
1813
1814static void e1000_register_types(void)
1815{
1816    int i;
1817
1818    type_register_static(&e1000_base_info);
1819    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1820        const E1000Info *info = &e1000_devices[i];
1821        TypeInfo type_info = {};
1822
1823        type_info.name = info->name;
1824        type_info.parent = TYPE_E1000_BASE;
1825        type_info.class_data = (void *)info;
1826        type_info.class_init = e1000_class_init;
1827
1828        type_register(&type_info);
1829    }
1830}
1831
1832type_init(e1000_register_types)
1833