qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "hw/hw.h"
  29#include "hw/pci/pci.h"
  30#include "net/net.h"
  31#include "net/checksum.h"
  32#include "hw/loader.h"
  33#include "sysemu/sysemu.h"
  34#include "sysemu/dma.h"
  35#include "qemu/iov.h"
  36#include "qemu/range.h"
  37
  38#include "e1000_regs.h"
  39
  40static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  41
  42#define E1000_DEBUG
  43
  44#ifdef E1000_DEBUG
  45enum {
  46    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  47    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  48    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  49    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  50};
  51#define DBGBIT(x)    (1<<DEBUG_##x)
  52static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  53
  54#define DBGOUT(what, fmt, ...) do { \
  55    if (debugflags & DBGBIT(what)) \
  56        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  57    } while (0)
  58#else
  59#define DBGOUT(what, fmt, ...) do {} while (0)
  60#endif
  61
  62#define IOPORT_SIZE       0x40
  63#define PNPMMIO_SIZE      0x20000
  64#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  65
  66/* this is the size past which hardware will drop packets when setting LPE=0 */
  67#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
  68/* this is the size past which hardware will drop packets when setting LPE=1 */
  69#define MAXIMUM_ETHERNET_LPE_SIZE 16384
  70
  71#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  72
  73/*
  74 * HW models:
  75 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  76 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  77 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  78 *  Others never tested
  79 */
  80
  81typedef struct E1000State_st {
  82    /*< private >*/
  83    PCIDevice parent_obj;
  84    /*< public >*/
  85
  86    NICState *nic;
  87    NICConf conf;
  88    MemoryRegion mmio;
  89    MemoryRegion io;
  90
  91    uint32_t mac_reg[0x8000];
  92    uint16_t phy_reg[0x20];
  93    uint16_t eeprom_data[64];
  94
  95    uint32_t rxbuf_size;
  96    uint32_t rxbuf_min_shift;
  97    struct e1000_tx {
  98        unsigned char header[256];
  99        unsigned char vlan_header[4];
 100        /* Fields vlan and data must not be reordered or separated. */
 101        unsigned char vlan[4];
 102        unsigned char data[0x10000];
 103        uint16_t size;
 104        unsigned char sum_needed;
 105        unsigned char vlan_needed;
 106        uint8_t ipcss;
 107        uint8_t ipcso;
 108        uint16_t ipcse;
 109        uint8_t tucss;
 110        uint8_t tucso;
 111        uint16_t tucse;
 112        uint8_t hdr_len;
 113        uint16_t mss;
 114        uint32_t paylen;
 115        uint16_t tso_frames;
 116        char tse;
 117        int8_t ip;
 118        int8_t tcp;
 119        char cptse;     // current packet tse bit
 120    } tx;
 121
 122    struct {
 123        uint32_t val_in;    /* shifted in from guest driver */
 124        uint16_t bitnum_in;
 125        uint16_t bitnum_out;
 126        uint16_t reading;
 127        uint32_t old_eecd;
 128    } eecd_state;
 129
 130    QEMUTimer *autoneg_timer;
 131
 132    QEMUTimer *mit_timer;      /* Mitigation timer. */
 133    bool mit_timer_on;         /* Mitigation timer is running. */
 134    bool mit_irq_level;        /* Tracks interrupt pin level. */
 135    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 136
 137/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 138#define E1000_FLAG_AUTONEG_BIT 0
 139#define E1000_FLAG_MIT_BIT 1
 140#define E1000_FLAG_MAC_BIT 2
 141#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 142#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 143#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 144    uint32_t compat_flags;
 145} E1000State;
 146
 147#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 148
 149typedef struct E1000BaseClass {
 150    PCIDeviceClass parent_class;
 151    uint16_t phy_id2;
 152} E1000BaseClass;
 153
 154#define TYPE_E1000_BASE "e1000-base"
 155
 156#define E1000(obj) \
 157    OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
 158
 159#define E1000_DEVICE_CLASS(klass) \
 160     OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
 161#define E1000_DEVICE_GET_CLASS(obj) \
 162    OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
 163
 164#define defreg(x)    x = (E1000_##x>>2)
 165enum {
 166    defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
 167    defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
 168    defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
 169    defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
 170    defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
 171    defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
 172    defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
 173    defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
 174    defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
 175    defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
 176    defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
 177    defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
 178    defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
 179    defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
 180    defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
 181    defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
 182    defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
 183    defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
 184    defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
 185    defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
 186    defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
 187    defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
 188    defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
 189    defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
 190    defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
 191    defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
 192    defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
 193    defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
 194};
 195
 196static void
 197e1000_link_down(E1000State *s)
 198{
 199    s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
 200    s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
 201    s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
 202    s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
 203}
 204
 205static void
 206e1000_link_up(E1000State *s)
 207{
 208    s->mac_reg[STATUS] |= E1000_STATUS_LU;
 209    s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
 210
 211    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 212    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 213}
 214
 215static bool
 216have_autoneg(E1000State *s)
 217{
 218    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 219}
 220
 221static void
 222set_phy_ctrl(E1000State *s, int index, uint16_t val)
 223{
 224    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 225    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 226                                   MII_CR_RESET |
 227                                   MII_CR_RESTART_AUTO_NEG);
 228
 229    /*
 230     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 231     * migrate during auto negotiation, after migration the link will be
 232     * down.
 233     */
 234    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 235        e1000_link_down(s);
 236        DBGOUT(PHY, "Start link auto negotiation\n");
 237        timer_mod(s->autoneg_timer,
 238                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 239    }
 240}
 241
 242static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 243    [PHY_CTRL] = set_phy_ctrl,
 244};
 245
 246enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 247
 248enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 249static const char phy_regcap[0x20] = {
 250    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 251    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 252    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 253    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 254    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 255    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 256    [PHY_AUTONEG_EXP] = PHY_R,
 257};
 258
 259/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 260static const uint16_t phy_reg_init[] = {
 261    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 262                   MII_CR_FULL_DUPLEX |
 263                   MII_CR_AUTO_NEG_EN,
 264
 265    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 266                   MII_SR_LINK_STATUS |   /* link initially up */
 267                   MII_SR_AUTONEG_CAPS |
 268                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 269                   MII_SR_PREAMBLE_SUPPRESS |
 270                   MII_SR_EXTENDED_STATUS |
 271                   MII_SR_10T_HD_CAPS |
 272                   MII_SR_10T_FD_CAPS |
 273                   MII_SR_100X_HD_CAPS |
 274                   MII_SR_100X_FD_CAPS,
 275
 276    [PHY_ID1] = 0x141,
 277    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 278    [PHY_AUTONEG_ADV] = 0xde1,
 279    [PHY_LP_ABILITY] = 0x1e0,
 280    [PHY_1000T_CTRL] = 0x0e00,
 281    [PHY_1000T_STATUS] = 0x3c00,
 282    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 283    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 284    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 285};
 286
 287static const uint32_t mac_reg_init[] = {
 288    [PBA]     = 0x00100030,
 289    [LEDCTL]  = 0x602,
 290    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 291                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 292    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 293                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 294                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 295                E1000_STATUS_LU,
 296    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 297                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 298                E1000_MANC_RMCP_EN,
 299};
 300
 301/* Helper function, *curr == 0 means the value is not set */
 302static inline void
 303mit_update_delay(uint32_t *curr, uint32_t value)
 304{
 305    if (value && (*curr == 0 || value < *curr)) {
 306        *curr = value;
 307    }
 308}
 309
 310static void
 311set_interrupt_cause(E1000State *s, int index, uint32_t val)
 312{
 313    PCIDevice *d = PCI_DEVICE(s);
 314    uint32_t pending_ints;
 315    uint32_t mit_delay;
 316
 317    s->mac_reg[ICR] = val;
 318
 319    /*
 320     * Make sure ICR and ICS registers have the same value.
 321     * The spec says that the ICS register is write-only.  However in practice,
 322     * on real hardware ICS is readable, and for reads it has the same value as
 323     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 324     *
 325     * The VxWorks PRO/1000 driver uses this behaviour.
 326     */
 327    s->mac_reg[ICS] = val;
 328
 329    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 330    if (!s->mit_irq_level && pending_ints) {
 331        /*
 332         * Here we detect a potential raising edge. We postpone raising the
 333         * interrupt line if we are inside the mitigation delay window
 334         * (s->mit_timer_on == 1).
 335         * We provide a partial implementation of interrupt mitigation,
 336         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 337         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 338         * RADV; relative timers based on TIDV and RDTR are not implemented.
 339         */
 340        if (s->mit_timer_on) {
 341            return;
 342        }
 343        if (chkflag(MIT)) {
 344            /* Compute the next mitigation delay according to pending
 345             * interrupts and the current values of RADV (provided
 346             * RDTR!=0), TADV and ITR.
 347             * Then rearm the timer.
 348             */
 349            mit_delay = 0;
 350            if (s->mit_ide &&
 351                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 352                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 353            }
 354            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 355                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 356            }
 357            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 358
 359            if (mit_delay) {
 360                s->mit_timer_on = 1;
 361                timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 362                          mit_delay * 256);
 363            }
 364            s->mit_ide = 0;
 365        }
 366    }
 367
 368    s->mit_irq_level = (pending_ints != 0);
 369    pci_set_irq(d, s->mit_irq_level);
 370}
 371
 372static void
 373e1000_mit_timer(void *opaque)
 374{
 375    E1000State *s = opaque;
 376
 377    s->mit_timer_on = 0;
 378    /* Call set_interrupt_cause to update the irq level (if necessary). */
 379    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 380}
 381
 382static void
 383set_ics(E1000State *s, int index, uint32_t val)
 384{
 385    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 386        s->mac_reg[IMS]);
 387    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 388}
 389
 390static void
 391e1000_autoneg_timer(void *opaque)
 392{
 393    E1000State *s = opaque;
 394    if (!qemu_get_queue(s->nic)->link_down) {
 395        e1000_link_up(s);
 396        s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
 397        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 398        DBGOUT(PHY, "Auto negotiation is completed\n");
 399        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 400    }
 401}
 402
 403static int
 404rxbufsize(uint32_t v)
 405{
 406    v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
 407         E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
 408         E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
 409    switch (v) {
 410    case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
 411        return 16384;
 412    case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
 413        return 8192;
 414    case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
 415        return 4096;
 416    case E1000_RCTL_SZ_1024:
 417        return 1024;
 418    case E1000_RCTL_SZ_512:
 419        return 512;
 420    case E1000_RCTL_SZ_256:
 421        return 256;
 422    }
 423    return 2048;
 424}
 425
 426static void e1000_reset(void *opaque)
 427{
 428    E1000State *d = opaque;
 429    E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
 430    uint8_t *macaddr = d->conf.macaddr.a;
 431    int i;
 432
 433    timer_del(d->autoneg_timer);
 434    timer_del(d->mit_timer);
 435    d->mit_timer_on = 0;
 436    d->mit_irq_level = 0;
 437    d->mit_ide = 0;
 438    memset(d->phy_reg, 0, sizeof d->phy_reg);
 439    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 440    d->phy_reg[PHY_ID2] = edc->phy_id2;
 441    memset(d->mac_reg, 0, sizeof d->mac_reg);
 442    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 443    d->rxbuf_min_shift = 1;
 444    memset(&d->tx, 0, sizeof d->tx);
 445
 446    if (qemu_get_queue(d->nic)->link_down) {
 447        e1000_link_down(d);
 448    }
 449
 450    /* Throttle interrupts to prevent guest (e.g Win 2012) from
 451     * reinjecting interrupts endlessly. TODO: fix non ITR case.
 452     */
 453    d->mac_reg[ITR] = 250;
 454
 455    /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
 456    d->mac_reg[RA] = 0;
 457    d->mac_reg[RA + 1] = E1000_RAH_AV;
 458    for (i = 0; i < 4; i++) {
 459        d->mac_reg[RA] |= macaddr[i] << (8 * i);
 460        d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
 461    }
 462    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
 463}
 464
 465static void
 466set_ctrl(E1000State *s, int index, uint32_t val)
 467{
 468    /* RST is self clearing */
 469    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 470}
 471
 472static void
 473set_rx_control(E1000State *s, int index, uint32_t val)
 474{
 475    s->mac_reg[RCTL] = val;
 476    s->rxbuf_size = rxbufsize(val);
 477    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 478    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 479           s->mac_reg[RCTL]);
 480    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 481}
 482
 483static void
 484set_mdic(E1000State *s, int index, uint32_t val)
 485{
 486    uint32_t data = val & E1000_MDIC_DATA_MASK;
 487    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 488
 489    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 490        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 491    else if (val & E1000_MDIC_OP_READ) {
 492        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 493        if (!(phy_regcap[addr] & PHY_R)) {
 494            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 495            val |= E1000_MDIC_ERROR;
 496        } else
 497            val = (val ^ data) | s->phy_reg[addr];
 498    } else if (val & E1000_MDIC_OP_WRITE) {
 499        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 500        if (!(phy_regcap[addr] & PHY_W)) {
 501            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 502            val |= E1000_MDIC_ERROR;
 503        } else {
 504            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 505                phyreg_writeops[addr](s, index, data);
 506            } else {
 507                s->phy_reg[addr] = data;
 508            }
 509        }
 510    }
 511    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 512
 513    if (val & E1000_MDIC_INT_EN) {
 514        set_ics(s, 0, E1000_ICR_MDAC);
 515    }
 516}
 517
 518static uint32_t
 519get_eecd(E1000State *s, int index)
 520{
 521    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 522
 523    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 524           s->eecd_state.bitnum_out, s->eecd_state.reading);
 525    if (!s->eecd_state.reading ||
 526        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 527          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 528        ret |= E1000_EECD_DO;
 529    return ret;
 530}
 531
 532static void
 533set_eecd(E1000State *s, int index, uint32_t val)
 534{
 535    uint32_t oldval = s->eecd_state.old_eecd;
 536
 537    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 538            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 539    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 540        return;
 541    }
 542    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 543        s->eecd_state.val_in = 0;
 544        s->eecd_state.bitnum_in = 0;
 545        s->eecd_state.bitnum_out = 0;
 546        s->eecd_state.reading = 0;
 547    }
 548    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 549        return;
 550    }
 551    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 552        s->eecd_state.bitnum_out++;
 553        return;
 554    }
 555    s->eecd_state.val_in <<= 1;
 556    if (val & E1000_EECD_DI)
 557        s->eecd_state.val_in |= 1;
 558    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 559        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 560        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 561            EEPROM_READ_OPCODE_MICROWIRE);
 562    }
 563    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 564           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 565           s->eecd_state.reading);
 566}
 567
 568static uint32_t
 569flash_eerd_read(E1000State *s, int x)
 570{
 571    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 572
 573    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 574        return (s->mac_reg[EERD]);
 575
 576    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 577        return (E1000_EEPROM_RW_REG_DONE | r);
 578
 579    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 580           E1000_EEPROM_RW_REG_DONE | r);
 581}
 582
 583static void
 584putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 585{
 586    uint32_t sum;
 587
 588    if (cse && cse < n)
 589        n = cse + 1;
 590    if (sloc < n-1) {
 591        sum = net_checksum_add(n-css, data+css);
 592        stw_be_p(data + sloc, net_checksum_finish(sum));
 593    }
 594}
 595
 596static inline void
 597inc_reg_if_not_full(E1000State *s, int index)
 598{
 599    if (s->mac_reg[index] != 0xffffffff) {
 600        s->mac_reg[index]++;
 601    }
 602}
 603
 604static inline void
 605inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 606{
 607    if (!memcmp(arr, bcast, sizeof bcast)) {
 608        inc_reg_if_not_full(s, BPTC);
 609    } else if (arr[0] & 1) {
 610        inc_reg_if_not_full(s, MPTC);
 611    }
 612}
 613
 614static void
 615grow_8reg_if_not_full(E1000State *s, int index, int size)
 616{
 617    uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
 618
 619    if (sum + size < sum) {
 620        sum = ~0ULL;
 621    } else {
 622        sum += size;
 623    }
 624    s->mac_reg[index] = sum;
 625    s->mac_reg[index+1] = sum >> 32;
 626}
 627
 628static void
 629increase_size_stats(E1000State *s, const int *size_regs, int size)
 630{
 631    if (size > 1023) {
 632        inc_reg_if_not_full(s, size_regs[5]);
 633    } else if (size > 511) {
 634        inc_reg_if_not_full(s, size_regs[4]);
 635    } else if (size > 255) {
 636        inc_reg_if_not_full(s, size_regs[3]);
 637    } else if (size > 127) {
 638        inc_reg_if_not_full(s, size_regs[2]);
 639    } else if (size > 64) {
 640        inc_reg_if_not_full(s, size_regs[1]);
 641    } else if (size == 64) {
 642        inc_reg_if_not_full(s, size_regs[0]);
 643    }
 644}
 645
 646static inline int
 647vlan_enabled(E1000State *s)
 648{
 649    return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
 650}
 651
 652static inline int
 653vlan_rx_filter_enabled(E1000State *s)
 654{
 655    return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
 656}
 657
 658static inline int
 659is_vlan_packet(E1000State *s, const uint8_t *buf)
 660{
 661    return (be16_to_cpup((uint16_t *)(buf + 12)) ==
 662                le16_to_cpu(s->mac_reg[VET]));
 663}
 664
 665static inline int
 666is_vlan_txd(uint32_t txd_lower)
 667{
 668    return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
 669}
 670
 671/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
 672 * fill it in, just pad descriptor length by 4 bytes unless guest
 673 * told us to strip it off the packet. */
 674static inline int
 675fcs_len(E1000State *s)
 676{
 677    return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
 678}
 679
 680static void
 681e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 682{
 683    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 684                                    PTC1023, PTC1522 };
 685
 686    NetClientState *nc = qemu_get_queue(s->nic);
 687    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 688        nc->info->receive(nc, buf, size);
 689    } else {
 690        qemu_send_packet(nc, buf, size);
 691    }
 692    inc_tx_bcast_or_mcast_count(s, buf);
 693    increase_size_stats(s, PTCregs, size);
 694}
 695
 696static void
 697xmit_seg(E1000State *s)
 698{
 699    uint16_t len, *sp;
 700    unsigned int frames = s->tx.tso_frames, css, sofar;
 701    struct e1000_tx *tp = &s->tx;
 702
 703    if (tp->tse && tp->cptse) {
 704        css = tp->ipcss;
 705        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 706               frames, tp->size, css);
 707        if (tp->ip) {    /* IPv4 */
 708            stw_be_p(tp->data+css+2, tp->size - css);
 709            stw_be_p(tp->data+css+4,
 710                     be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
 711        } else {         /* IPv6 */
 712            stw_be_p(tp->data+css+4, tp->size - css);
 713        }
 714        css = tp->tucss;
 715        len = tp->size - css;
 716        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
 717        if (tp->tcp) {
 718            sofar = frames * tp->mss;
 719            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 720            if (tp->paylen - sofar > tp->mss) {
 721                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 722            } else if (frames) {
 723                inc_reg_if_not_full(s, TSCTC);
 724            }
 725        } else    /* UDP */
 726            stw_be_p(tp->data+css+4, len);
 727        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 728            unsigned int phsum;
 729            // add pseudo-header length before checksum calculation
 730            sp = (uint16_t *)(tp->data + tp->tucso);
 731            phsum = be16_to_cpup(sp) + len;
 732            phsum = (phsum >> 16) + (phsum & 0xffff);
 733            stw_be_p(sp, phsum);
 734        }
 735        tp->tso_frames++;
 736    }
 737
 738    if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
 739        putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
 740    if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
 741        putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
 742    if (tp->vlan_needed) {
 743        memmove(tp->vlan, tp->data, 4);
 744        memmove(tp->data, tp->data + 4, 8);
 745        memcpy(tp->data + 8, tp->vlan_header, 4);
 746        e1000_send_packet(s, tp->vlan, tp->size + 4);
 747    } else {
 748        e1000_send_packet(s, tp->data, tp->size);
 749    }
 750
 751    inc_reg_if_not_full(s, TPT);
 752    grow_8reg_if_not_full(s, TOTL, s->tx.size);
 753    s->mac_reg[GPTC] = s->mac_reg[TPT];
 754    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 755    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 756}
 757
 758static void
 759process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 760{
 761    PCIDevice *d = PCI_DEVICE(s);
 762    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 763    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 764    unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
 765    unsigned int msh = 0xfffff;
 766    uint64_t addr;
 767    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 768    struct e1000_tx *tp = &s->tx;
 769
 770    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 771    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 772        op = le32_to_cpu(xp->cmd_and_length);
 773        tp->ipcss = xp->lower_setup.ip_fields.ipcss;
 774        tp->ipcso = xp->lower_setup.ip_fields.ipcso;
 775        tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
 776        tp->tucss = xp->upper_setup.tcp_fields.tucss;
 777        tp->tucso = xp->upper_setup.tcp_fields.tucso;
 778        tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
 779        tp->paylen = op & 0xfffff;
 780        tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
 781        tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
 782        tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
 783        tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
 784        tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
 785        tp->tso_frames = 0;
 786        if (tp->tucso == 0) {    /* this is probably wrong */
 787            DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
 788            tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
 789        }
 790        return;
 791    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 792        // data descriptor
 793        if (tp->size == 0) {
 794            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 795        }
 796        tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
 797    } else {
 798        // legacy descriptor
 799        tp->cptse = 0;
 800    }
 801
 802    if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
 803        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 804        tp->vlan_needed = 1;
 805        stw_be_p(tp->vlan_header,
 806                      le16_to_cpu(s->mac_reg[VET]));
 807        stw_be_p(tp->vlan_header + 2,
 808                      le16_to_cpu(dp->upper.fields.special));
 809    }
 810
 811    addr = le64_to_cpu(dp->buffer_addr);
 812    if (tp->tse && tp->cptse) {
 813        msh = tp->hdr_len + tp->mss;
 814        do {
 815            bytes = split_size;
 816            if (tp->size + bytes > msh)
 817                bytes = msh - tp->size;
 818
 819            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 820            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 821            sz = tp->size + bytes;
 822            if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
 823                memmove(tp->header, tp->data, tp->hdr_len);
 824            }
 825            tp->size = sz;
 826            addr += bytes;
 827            if (sz == msh) {
 828                xmit_seg(s);
 829                memmove(tp->data, tp->header, tp->hdr_len);
 830                tp->size = tp->hdr_len;
 831            }
 832            split_size -= bytes;
 833        } while (bytes && split_size);
 834    } else if (!tp->tse && tp->cptse) {
 835        // context descriptor TSE is not set, while data descriptor TSE is set
 836        DBGOUT(TXERR, "TCP segmentation error\n");
 837    } else {
 838        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 839        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 840        tp->size += split_size;
 841    }
 842
 843    if (!(txd_lower & E1000_TXD_CMD_EOP))
 844        return;
 845    if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
 846        xmit_seg(s);
 847    }
 848    tp->tso_frames = 0;
 849    tp->sum_needed = 0;
 850    tp->vlan_needed = 0;
 851    tp->size = 0;
 852    tp->cptse = 0;
 853}
 854
 855static uint32_t
 856txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 857{
 858    PCIDevice *d = PCI_DEVICE(s);
 859    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 860
 861    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 862        return 0;
 863    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 864                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 865    dp->upper.data = cpu_to_le32(txd_upper);
 866    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 867                  &dp->upper, sizeof(dp->upper));
 868    return E1000_ICR_TXDW;
 869}
 870
 871static uint64_t tx_desc_base(E1000State *s)
 872{
 873    uint64_t bah = s->mac_reg[TDBAH];
 874    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 875
 876    return (bah << 32) + bal;
 877}
 878
 879static void
 880start_xmit(E1000State *s)
 881{
 882    PCIDevice *d = PCI_DEVICE(s);
 883    dma_addr_t base;
 884    struct e1000_tx_desc desc;
 885    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 886
 887    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 888        DBGOUT(TX, "tx disabled\n");
 889        return;
 890    }
 891
 892    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 893        base = tx_desc_base(s) +
 894               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 895        pci_dma_read(d, base, &desc, sizeof(desc));
 896
 897        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 898               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 899               desc.upper.data);
 900
 901        process_tx_desc(s, &desc);
 902        cause |= txdesc_writeback(s, base, &desc);
 903
 904        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 905            s->mac_reg[TDH] = 0;
 906        /*
 907         * the following could happen only if guest sw assigns
 908         * bogus values to TDT/TDLEN.
 909         * there's nothing too intelligent we could do about this.
 910         */
 911        if (s->mac_reg[TDH] == tdh_start) {
 912            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 913                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 914            break;
 915        }
 916    }
 917    set_ics(s, 0, cause);
 918}
 919
 920static int
 921receive_filter(E1000State *s, const uint8_t *buf, int size)
 922{
 923    static const int mta_shift[] = {4, 3, 2, 0};
 924    uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
 925    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 926
 927    if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
 928        uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
 929        uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
 930                                     ((vid >> 5) & 0x7f));
 931        if ((vfta & (1 << (vid & 0x1f))) == 0)
 932            return 0;
 933    }
 934
 935    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 936        return 1;
 937    }
 938
 939    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 940        inc_reg_if_not_full(s, MPRC);
 941        return 1;
 942    }
 943
 944    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 945        inc_reg_if_not_full(s, BPRC);
 946        return 1;
 947    }
 948
 949    for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
 950        if (!(rp[1] & E1000_RAH_AV))
 951            continue;
 952        ra[0] = cpu_to_le32(rp[0]);
 953        ra[1] = cpu_to_le32(rp[1]);
 954        if (!memcmp(buf, (uint8_t *)ra, 6)) {
 955            DBGOUT(RXFILTER,
 956                   "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
 957                   (int)(rp - s->mac_reg - RA)/2,
 958                   buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 959            return 1;
 960        }
 961    }
 962    DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
 963           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 964
 965    f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 966    f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
 967    if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
 968        inc_reg_if_not_full(s, MPRC);
 969        return 1;
 970    }
 971    DBGOUT(RXFILTER,
 972           "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
 973           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
 974           (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
 975           s->mac_reg[MTA + (f >> 5)]);
 976
 977    return 0;
 978}
 979
 980static void
 981e1000_set_link_status(NetClientState *nc)
 982{
 983    E1000State *s = qemu_get_nic_opaque(nc);
 984    uint32_t old_status = s->mac_reg[STATUS];
 985
 986    if (nc->link_down) {
 987        e1000_link_down(s);
 988    } else {
 989        if (have_autoneg(s) &&
 990            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 991            /* emulate auto-negotiation if supported */
 992            timer_mod(s->autoneg_timer,
 993                      qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 994        } else {
 995            e1000_link_up(s);
 996        }
 997    }
 998
 999    if (s->mac_reg[STATUS] != old_status)
1000        set_ics(s, 0, E1000_ICR_LSC);
1001}
1002
1003static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1004{
1005    int bufs;
1006    /* Fast-path short packets */
1007    if (total_size <= s->rxbuf_size) {
1008        return s->mac_reg[RDH] != s->mac_reg[RDT];
1009    }
1010    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1011        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1012    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1013        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1014            s->mac_reg[RDT] - s->mac_reg[RDH];
1015    } else {
1016        return false;
1017    }
1018    return total_size <= bufs * s->rxbuf_size;
1019}
1020
1021static int
1022e1000_can_receive(NetClientState *nc)
1023{
1024    E1000State *s = qemu_get_nic_opaque(nc);
1025
1026    return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1027        (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1028        (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1029        e1000_has_rxbufs(s, 1);
1030}
1031
1032static uint64_t rx_desc_base(E1000State *s)
1033{
1034    uint64_t bah = s->mac_reg[RDBAH];
1035    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1036
1037    return (bah << 32) + bal;
1038}
1039
1040static ssize_t
1041e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1042{
1043    E1000State *s = qemu_get_nic_opaque(nc);
1044    PCIDevice *d = PCI_DEVICE(s);
1045    struct e1000_rx_desc desc;
1046    dma_addr_t base;
1047    unsigned int n, rdt;
1048    uint32_t rdh_start;
1049    uint16_t vlan_special = 0;
1050    uint8_t vlan_status = 0;
1051    uint8_t min_buf[MIN_BUF_SIZE];
1052    struct iovec min_iov;
1053    uint8_t *filter_buf = iov->iov_base;
1054    size_t size = iov_size(iov, iovcnt);
1055    size_t iov_ofs = 0;
1056    size_t desc_offset;
1057    size_t desc_size;
1058    size_t total_size;
1059    static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1060                                    PRC1023, PRC1522 };
1061
1062    if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1063        return -1;
1064    }
1065
1066    if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1067        return -1;
1068    }
1069
1070    /* Pad to minimum Ethernet frame length */
1071    if (size < sizeof(min_buf)) {
1072        iov_to_buf(iov, iovcnt, 0, min_buf, size);
1073        memset(&min_buf[size], 0, sizeof(min_buf) - size);
1074        inc_reg_if_not_full(s, RUC);
1075        min_iov.iov_base = filter_buf = min_buf;
1076        min_iov.iov_len = size = sizeof(min_buf);
1077        iovcnt = 1;
1078        iov = &min_iov;
1079    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1080        /* This is very unlikely, but may happen. */
1081        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1082        filter_buf = min_buf;
1083    }
1084
1085    /* Discard oversized packets if !LPE and !SBP. */
1086    if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1087        (size > MAXIMUM_ETHERNET_VLAN_SIZE
1088        && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1089        && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1090        inc_reg_if_not_full(s, ROC);
1091        return size;
1092    }
1093
1094    if (!receive_filter(s, filter_buf, size)) {
1095        return size;
1096    }
1097
1098    if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1099        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1100                                                                + 14)));
1101        iov_ofs = 4;
1102        if (filter_buf == iov->iov_base) {
1103            memmove(filter_buf + 4, filter_buf, 12);
1104        } else {
1105            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1106            while (iov->iov_len <= iov_ofs) {
1107                iov_ofs -= iov->iov_len;
1108                iov++;
1109            }
1110        }
1111        vlan_status = E1000_RXD_STAT_VP;
1112        size -= 4;
1113    }
1114
1115    rdh_start = s->mac_reg[RDH];
1116    desc_offset = 0;
1117    total_size = size + fcs_len(s);
1118    if (!e1000_has_rxbufs(s, total_size)) {
1119            set_ics(s, 0, E1000_ICS_RXO);
1120            return -1;
1121    }
1122    do {
1123        desc_size = total_size - desc_offset;
1124        if (desc_size > s->rxbuf_size) {
1125            desc_size = s->rxbuf_size;
1126        }
1127        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1128        pci_dma_read(d, base, &desc, sizeof(desc));
1129        desc.special = vlan_special;
1130        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1131        if (desc.buffer_addr) {
1132            if (desc_offset < size) {
1133                size_t iov_copy;
1134                hwaddr ba = le64_to_cpu(desc.buffer_addr);
1135                size_t copy_size = size - desc_offset;
1136                if (copy_size > s->rxbuf_size) {
1137                    copy_size = s->rxbuf_size;
1138                }
1139                do {
1140                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1141                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1142                    copy_size -= iov_copy;
1143                    ba += iov_copy;
1144                    iov_ofs += iov_copy;
1145                    if (iov_ofs == iov->iov_len) {
1146                        iov++;
1147                        iov_ofs = 0;
1148                    }
1149                } while (copy_size);
1150            }
1151            desc_offset += desc_size;
1152            desc.length = cpu_to_le16(desc_size);
1153            if (desc_offset >= total_size) {
1154                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1155            } else {
1156                /* Guest zeroing out status is not a hardware requirement.
1157                   Clear EOP in case guest didn't do it. */
1158                desc.status &= ~E1000_RXD_STAT_EOP;
1159            }
1160        } else { // as per intel docs; skip descriptors with null buf addr
1161            DBGOUT(RX, "Null RX descriptor!!\n");
1162        }
1163        pci_dma_write(d, base, &desc, sizeof(desc));
1164
1165        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1166            s->mac_reg[RDH] = 0;
1167        /* see comment in start_xmit; same here */
1168        if (s->mac_reg[RDH] == rdh_start) {
1169            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1170                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1171            set_ics(s, 0, E1000_ICS_RXO);
1172            return -1;
1173        }
1174    } while (desc_offset < total_size);
1175
1176    increase_size_stats(s, PRCregs, total_size);
1177    inc_reg_if_not_full(s, TPR);
1178    s->mac_reg[GPRC] = s->mac_reg[TPR];
1179    /* TOR - Total Octets Received:
1180     * This register includes bytes received in a packet from the <Destination
1181     * Address> field through the <CRC> field, inclusively.
1182     * Always include FCS length (4) in size.
1183     */
1184    grow_8reg_if_not_full(s, TORL, size+4);
1185    s->mac_reg[GORCL] = s->mac_reg[TORL];
1186    s->mac_reg[GORCH] = s->mac_reg[TORH];
1187
1188    n = E1000_ICS_RXT0;
1189    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1190        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1191    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1192        s->rxbuf_min_shift)
1193        n |= E1000_ICS_RXDMT0;
1194
1195    set_ics(s, 0, n);
1196
1197    return size;
1198}
1199
1200static ssize_t
1201e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1202{
1203    const struct iovec iov = {
1204        .iov_base = (uint8_t *)buf,
1205        .iov_len = size
1206    };
1207
1208    return e1000_receive_iov(nc, &iov, 1);
1209}
1210
1211static uint32_t
1212mac_readreg(E1000State *s, int index)
1213{
1214    return s->mac_reg[index];
1215}
1216
1217static uint32_t
1218mac_low4_read(E1000State *s, int index)
1219{
1220    return s->mac_reg[index] & 0xf;
1221}
1222
1223static uint32_t
1224mac_low11_read(E1000State *s, int index)
1225{
1226    return s->mac_reg[index] & 0x7ff;
1227}
1228
1229static uint32_t
1230mac_low13_read(E1000State *s, int index)
1231{
1232    return s->mac_reg[index] & 0x1fff;
1233}
1234
1235static uint32_t
1236mac_low16_read(E1000State *s, int index)
1237{
1238    return s->mac_reg[index] & 0xffff;
1239}
1240
1241static uint32_t
1242mac_icr_read(E1000State *s, int index)
1243{
1244    uint32_t ret = s->mac_reg[ICR];
1245
1246    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1247    set_interrupt_cause(s, 0, 0);
1248    return ret;
1249}
1250
1251static uint32_t
1252mac_read_clr4(E1000State *s, int index)
1253{
1254    uint32_t ret = s->mac_reg[index];
1255
1256    s->mac_reg[index] = 0;
1257    return ret;
1258}
1259
1260static uint32_t
1261mac_read_clr8(E1000State *s, int index)
1262{
1263    uint32_t ret = s->mac_reg[index];
1264
1265    s->mac_reg[index] = 0;
1266    s->mac_reg[index-1] = 0;
1267    return ret;
1268}
1269
1270static void
1271mac_writereg(E1000State *s, int index, uint32_t val)
1272{
1273    uint32_t macaddr[2];
1274
1275    s->mac_reg[index] = val;
1276
1277    if (index == RA + 1) {
1278        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1279        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1280        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1281    }
1282}
1283
1284static void
1285set_rdt(E1000State *s, int index, uint32_t val)
1286{
1287    s->mac_reg[index] = val & 0xffff;
1288    if (e1000_has_rxbufs(s, 1)) {
1289        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1290    }
1291}
1292
1293static void
1294set_16bit(E1000State *s, int index, uint32_t val)
1295{
1296    s->mac_reg[index] = val & 0xffff;
1297}
1298
1299static void
1300set_dlen(E1000State *s, int index, uint32_t val)
1301{
1302    s->mac_reg[index] = val & 0xfff80;
1303}
1304
1305static void
1306set_tctl(E1000State *s, int index, uint32_t val)
1307{
1308    s->mac_reg[index] = val;
1309    s->mac_reg[TDT] &= 0xffff;
1310    start_xmit(s);
1311}
1312
1313static void
1314set_icr(E1000State *s, int index, uint32_t val)
1315{
1316    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1317    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1318}
1319
1320static void
1321set_imc(E1000State *s, int index, uint32_t val)
1322{
1323    s->mac_reg[IMS] &= ~val;
1324    set_ics(s, 0, 0);
1325}
1326
1327static void
1328set_ims(E1000State *s, int index, uint32_t val)
1329{
1330    s->mac_reg[IMS] |= val;
1331    set_ics(s, 0, 0);
1332}
1333
1334#define getreg(x)    [x] = mac_readreg
1335static uint32_t (*macreg_readops[])(E1000State *, int) = {
1336    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1337    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1338    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1339    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1340    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1341    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1342    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1343    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1344    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1345    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1346    getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1347    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1348    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1349    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1350    getreg(GOTCL),
1351
1352    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1353    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1354    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1355    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1356    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1357    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1358    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1359    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1360    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1361    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1362    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1363    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1364    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1365    [MPTC]    = mac_read_clr4,
1366    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1367    [EERD]    = flash_eerd_read,
1368    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1369    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1370    [RDFPC]   = mac_low13_read,
1371    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1372    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1373    [TDFPC]   = mac_low13_read,
1374    [AIT]     = mac_low16_read,
1375
1376    [CRCERRS ... MPC]   = &mac_readreg,
1377    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1378    [FFLT ... FFLT+6]   = &mac_low11_read,
1379    [RA ... RA+31]      = &mac_readreg,
1380    [WUPM ... WUPM+31]  = &mac_readreg,
1381    [MTA ... MTA+127]   = &mac_readreg,
1382    [VFTA ... VFTA+127] = &mac_readreg,
1383    [FFMT ... FFMT+254] = &mac_low4_read,
1384    [FFVT ... FFVT+254] = &mac_readreg,
1385    [PBM ... PBM+16383] = &mac_readreg,
1386};
1387enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1388
1389#define putreg(x)    [x] = mac_writereg
1390static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1391    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1392    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1393    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1394    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1395    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1396    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1397    putreg(WUS),      putreg(AIT),
1398
1399    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1400    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1401    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1402    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1403    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1404    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1405    [ITR]    = set_16bit,
1406
1407    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1408    [FFLT ... FFLT+6]   = &mac_writereg,
1409    [RA ... RA+31]      = &mac_writereg,
1410    [WUPM ... WUPM+31]  = &mac_writereg,
1411    [MTA ... MTA+127]   = &mac_writereg,
1412    [VFTA ... VFTA+127] = &mac_writereg,
1413    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1414    [PBM ... PBM+16383] = &mac_writereg,
1415};
1416
1417enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1418
1419enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1420
1421#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1422/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1423 * f - flag bits (up to 6 possible flags)
1424 * n - flag needed
1425 * p - partially implenented */
1426static const uint8_t mac_reg_access[0x8000] = {
1427    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1428    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1429
1430    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1431    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1432    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1433    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1434    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1435    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1436    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1437    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1438    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1439    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1440    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1441    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1442    [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1443    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1444    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1445    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1446    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1447    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1448    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1449    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1450    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1451    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1452    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1453    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1454    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1455    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1456    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1457    [BPTC]    = markflag(MAC),
1458
1459    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1460    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1461    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1462    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1463    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1464    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1465    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1466    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1467    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1468    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1469    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1470};
1471
1472static void
1473e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1474                 unsigned size)
1475{
1476    E1000State *s = opaque;
1477    unsigned int index = (addr & 0x1ffff) >> 2;
1478
1479    if (index < NWRITEOPS && macreg_writeops[index]) {
1480        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1481            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1482            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1483                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1484                       "It is not fully implemented.\n", index<<2);
1485            }
1486            macreg_writeops[index](s, index, val);
1487        } else {    /* "flag needed" bit is set, but the flag is not active */
1488            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1489                   index<<2);
1490        }
1491    } else if (index < NREADOPS && macreg_readops[index]) {
1492        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1493               index<<2, val);
1494    } else {
1495        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1496               index<<2, val);
1497    }
1498}
1499
1500static uint64_t
1501e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1502{
1503    E1000State *s = opaque;
1504    unsigned int index = (addr & 0x1ffff) >> 2;
1505
1506    if (index < NREADOPS && macreg_readops[index]) {
1507        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1508            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1509            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1510                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1511                       "It is not fully implemented.\n", index<<2);
1512            }
1513            return macreg_readops[index](s, index);
1514        } else {    /* "flag needed" bit is set, but the flag is not active */
1515            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1516                   index<<2);
1517        }
1518    } else {
1519        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1520    }
1521    return 0;
1522}
1523
1524static const MemoryRegionOps e1000_mmio_ops = {
1525    .read = e1000_mmio_read,
1526    .write = e1000_mmio_write,
1527    .endianness = DEVICE_LITTLE_ENDIAN,
1528    .impl = {
1529        .min_access_size = 4,
1530        .max_access_size = 4,
1531    },
1532};
1533
1534static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1535                              unsigned size)
1536{
1537    E1000State *s = opaque;
1538
1539    (void)s;
1540    return 0;
1541}
1542
1543static void e1000_io_write(void *opaque, hwaddr addr,
1544                           uint64_t val, unsigned size)
1545{
1546    E1000State *s = opaque;
1547
1548    (void)s;
1549}
1550
1551static const MemoryRegionOps e1000_io_ops = {
1552    .read = e1000_io_read,
1553    .write = e1000_io_write,
1554    .endianness = DEVICE_LITTLE_ENDIAN,
1555};
1556
1557static bool is_version_1(void *opaque, int version_id)
1558{
1559    return version_id == 1;
1560}
1561
1562static void e1000_pre_save(void *opaque)
1563{
1564    E1000State *s = opaque;
1565    NetClientState *nc = qemu_get_queue(s->nic);
1566
1567    /* If the mitigation timer is active, emulate a timeout now. */
1568    if (s->mit_timer_on) {
1569        e1000_mit_timer(s);
1570    }
1571
1572    /*
1573     * If link is down and auto-negotiation is supported and ongoing,
1574     * complete auto-negotiation immediately. This allows us to look
1575     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1576     */
1577    if (nc->link_down && have_autoneg(s)) {
1578        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1579    }
1580}
1581
1582static int e1000_post_load(void *opaque, int version_id)
1583{
1584    E1000State *s = opaque;
1585    NetClientState *nc = qemu_get_queue(s->nic);
1586
1587    if (!chkflag(MIT)) {
1588        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1589            s->mac_reg[TADV] = 0;
1590        s->mit_irq_level = false;
1591    }
1592    s->mit_ide = 0;
1593    s->mit_timer_on = false;
1594
1595    /* nc.link_down can't be migrated, so infer link_down according
1596     * to link status bit in mac_reg[STATUS].
1597     * Alternatively, restart link negotiation if it was in progress. */
1598    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1599
1600    if (have_autoneg(s) &&
1601        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1602        nc->link_down = false;
1603        timer_mod(s->autoneg_timer,
1604                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1605    }
1606
1607    return 0;
1608}
1609
1610static bool e1000_mit_state_needed(void *opaque)
1611{
1612    E1000State *s = opaque;
1613
1614    return chkflag(MIT);
1615}
1616
1617static bool e1000_full_mac_needed(void *opaque)
1618{
1619    E1000State *s = opaque;
1620
1621    return chkflag(MAC);
1622}
1623
1624static const VMStateDescription vmstate_e1000_mit_state = {
1625    .name = "e1000/mit_state",
1626    .version_id = 1,
1627    .minimum_version_id = 1,
1628    .needed = e1000_mit_state_needed,
1629    .fields = (VMStateField[]) {
1630        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1631        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1632        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1633        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1634        VMSTATE_BOOL(mit_irq_level, E1000State),
1635        VMSTATE_END_OF_LIST()
1636    }
1637};
1638
1639static const VMStateDescription vmstate_e1000_full_mac_state = {
1640    .name = "e1000/full_mac_state",
1641    .version_id = 1,
1642    .minimum_version_id = 1,
1643    .needed = e1000_full_mac_needed,
1644    .fields = (VMStateField[]) {
1645        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1646        VMSTATE_END_OF_LIST()
1647    }
1648};
1649
1650static const VMStateDescription vmstate_e1000 = {
1651    .name = "e1000",
1652    .version_id = 2,
1653    .minimum_version_id = 1,
1654    .pre_save = e1000_pre_save,
1655    .post_load = e1000_post_load,
1656    .fields = (VMStateField[]) {
1657        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1658        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1659        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1660        VMSTATE_UINT32(rxbuf_size, E1000State),
1661        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1662        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1663        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1664        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1665        VMSTATE_UINT16(eecd_state.reading, E1000State),
1666        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1667        VMSTATE_UINT8(tx.ipcss, E1000State),
1668        VMSTATE_UINT8(tx.ipcso, E1000State),
1669        VMSTATE_UINT16(tx.ipcse, E1000State),
1670        VMSTATE_UINT8(tx.tucss, E1000State),
1671        VMSTATE_UINT8(tx.tucso, E1000State),
1672        VMSTATE_UINT16(tx.tucse, E1000State),
1673        VMSTATE_UINT32(tx.paylen, E1000State),
1674        VMSTATE_UINT8(tx.hdr_len, E1000State),
1675        VMSTATE_UINT16(tx.mss, E1000State),
1676        VMSTATE_UINT16(tx.size, E1000State),
1677        VMSTATE_UINT16(tx.tso_frames, E1000State),
1678        VMSTATE_UINT8(tx.sum_needed, E1000State),
1679        VMSTATE_INT8(tx.ip, E1000State),
1680        VMSTATE_INT8(tx.tcp, E1000State),
1681        VMSTATE_BUFFER(tx.header, E1000State),
1682        VMSTATE_BUFFER(tx.data, E1000State),
1683        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1684        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1685        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1686        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1687        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1688        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1689        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1690        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1691        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1692        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1693        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1694        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1695        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1696        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1697        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1698        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1699        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1700        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1701        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1702        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1703        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1704        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1705        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1706        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1707        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1708        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1709        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1710        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1711        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1712        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1713        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1714        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1715        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1716        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1717        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1718        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1719        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1720        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1721        VMSTATE_UINT32(mac_reg[VET], E1000State),
1722        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1723        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1724        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1725        VMSTATE_END_OF_LIST()
1726    },
1727    .subsections = (const VMStateDescription*[]) {
1728        &vmstate_e1000_mit_state,
1729        &vmstate_e1000_full_mac_state,
1730        NULL
1731    }
1732};
1733
1734/*
1735 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1736 * Note: A valid DevId will be inserted during pci_e1000_init().
1737 */
1738static const uint16_t e1000_eeprom_template[64] = {
1739    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1740    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1741    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1742    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1743    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1744    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1745    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1746    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1747};
1748
1749/* PCI interface */
1750
1751static void
1752e1000_mmio_setup(E1000State *d)
1753{
1754    int i;
1755    const uint32_t excluded_regs[] = {
1756        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1757        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1758    };
1759
1760    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1761                          "e1000-mmio", PNPMMIO_SIZE);
1762    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1763    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1764        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1765                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1766    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1767}
1768
1769static void
1770pci_e1000_uninit(PCIDevice *dev)
1771{
1772    E1000State *d = E1000(dev);
1773
1774    timer_del(d->autoneg_timer);
1775    timer_free(d->autoneg_timer);
1776    timer_del(d->mit_timer);
1777    timer_free(d->mit_timer);
1778    qemu_del_nic(d->nic);
1779}
1780
1781static NetClientInfo net_e1000_info = {
1782    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1783    .size = sizeof(NICState),
1784    .can_receive = e1000_can_receive,
1785    .receive = e1000_receive,
1786    .receive_iov = e1000_receive_iov,
1787    .link_status_changed = e1000_set_link_status,
1788};
1789
1790static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1791                                uint32_t val, int len)
1792{
1793    E1000State *s = E1000(pci_dev);
1794
1795    pci_default_write_config(pci_dev, address, val, len);
1796
1797    if (range_covers_byte(address, len, PCI_COMMAND) &&
1798        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1799        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1800    }
1801}
1802
1803
1804static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1805{
1806    DeviceState *dev = DEVICE(pci_dev);
1807    E1000State *d = E1000(pci_dev);
1808    PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1809    uint8_t *pci_conf;
1810    uint16_t checksum = 0;
1811    int i;
1812    uint8_t *macaddr;
1813
1814    pci_dev->config_write = e1000_write_config;
1815
1816    pci_conf = pci_dev->config;
1817
1818    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1819    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1820
1821    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1822
1823    e1000_mmio_setup(d);
1824
1825    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1826
1827    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1828
1829    memmove(d->eeprom_data, e1000_eeprom_template,
1830        sizeof e1000_eeprom_template);
1831    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1832    macaddr = d->conf.macaddr.a;
1833    for (i = 0; i < 3; i++)
1834        d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1835    d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1836    for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1837        checksum += d->eeprom_data[i];
1838    checksum = (uint16_t) EEPROM_SUM - checksum;
1839    d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1840
1841    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1842                          object_get_typename(OBJECT(d)), dev->id, d);
1843
1844    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1845
1846    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1847    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1848}
1849
1850static void qdev_e1000_reset(DeviceState *dev)
1851{
1852    E1000State *d = E1000(dev);
1853    e1000_reset(d);
1854}
1855
1856static Property e1000_properties[] = {
1857    DEFINE_NIC_PROPERTIES(E1000State, conf),
1858    DEFINE_PROP_BIT("autonegotiation", E1000State,
1859                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1860    DEFINE_PROP_BIT("mitigation", E1000State,
1861                    compat_flags, E1000_FLAG_MIT_BIT, true),
1862    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1863                    compat_flags, E1000_FLAG_MAC_BIT, true),
1864    DEFINE_PROP_END_OF_LIST(),
1865};
1866
1867typedef struct E1000Info {
1868    const char *name;
1869    uint16_t   device_id;
1870    uint8_t    revision;
1871    uint16_t   phy_id2;
1872} E1000Info;
1873
1874static void e1000_class_init(ObjectClass *klass, void *data)
1875{
1876    DeviceClass *dc = DEVICE_CLASS(klass);
1877    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1878    E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1879    const E1000Info *info = data;
1880
1881    k->realize = pci_e1000_realize;
1882    k->exit = pci_e1000_uninit;
1883    k->romfile = "efi-e1000.rom";
1884    k->vendor_id = PCI_VENDOR_ID_INTEL;
1885    k->device_id = info->device_id;
1886    k->revision = info->revision;
1887    e->phy_id2 = info->phy_id2;
1888    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1889    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1890    dc->desc = "Intel Gigabit Ethernet";
1891    dc->reset = qdev_e1000_reset;
1892    dc->vmsd = &vmstate_e1000;
1893    dc->props = e1000_properties;
1894}
1895
1896static void e1000_instance_init(Object *obj)
1897{
1898    E1000State *n = E1000(obj);
1899    device_add_bootindex_property(obj, &n->conf.bootindex,
1900                                  "bootindex", "/ethernet-phy@0",
1901                                  DEVICE(n), NULL);
1902}
1903
1904static const TypeInfo e1000_base_info = {
1905    .name          = TYPE_E1000_BASE,
1906    .parent        = TYPE_PCI_DEVICE,
1907    .instance_size = sizeof(E1000State),
1908    .instance_init = e1000_instance_init,
1909    .class_size    = sizeof(E1000BaseClass),
1910    .abstract      = true,
1911};
1912
1913static const E1000Info e1000_devices[] = {
1914    {
1915        .name      = "e1000",
1916        .device_id = E1000_DEV_ID_82540EM,
1917        .revision  = 0x03,
1918        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1919    },
1920    {
1921        .name      = "e1000-82544gc",
1922        .device_id = E1000_DEV_ID_82544GC_COPPER,
1923        .revision  = 0x03,
1924        .phy_id2   = E1000_PHY_ID2_82544x,
1925    },
1926    {
1927        .name      = "e1000-82545em",
1928        .device_id = E1000_DEV_ID_82545EM_COPPER,
1929        .revision  = 0x03,
1930        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1931    },
1932};
1933
1934static void e1000_register_types(void)
1935{
1936    int i;
1937
1938    type_register_static(&e1000_base_info);
1939    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1940        const E1000Info *info = &e1000_devices[i];
1941        TypeInfo type_info = {};
1942
1943        type_info.name = info->name;
1944        type_info.parent = TYPE_E1000_BASE;
1945        type_info.class_data = (void *)info;
1946        type_info.class_init = e1000_class_init;
1947        type_info.instance_init = e1000_instance_init;
1948
1949        type_register(&type_info);
1950    }
1951}
1952
1953type_init(e1000_register_types)
1954