qemu/hw/net/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "qemu/osdep.h"
  29#include "hw/hw.h"
  30#include "hw/pci/pci.h"
  31#include "net/net.h"
  32#include "net/checksum.h"
  33#include "hw/loader.h"
  34#include "sysemu/sysemu.h"
  35#include "sysemu/dma.h"
  36#include "qemu/iov.h"
  37#include "qemu/range.h"
  38
  39#include "e1000_regs.h"
  40
  41static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  42
  43#define E1000_DEBUG
  44
  45#ifdef E1000_DEBUG
  46enum {
  47    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  48    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  49    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  50    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  51};
  52#define DBGBIT(x)    (1<<DEBUG_##x)
  53static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  54
  55#define DBGOUT(what, fmt, ...) do { \
  56    if (debugflags & DBGBIT(what)) \
  57        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  58    } while (0)
  59#else
  60#define DBGOUT(what, fmt, ...) do {} while (0)
  61#endif
  62
  63#define IOPORT_SIZE       0x40
  64#define PNPMMIO_SIZE      0x20000
  65#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  66
  67/* this is the size past which hardware will drop packets when setting LPE=0 */
  68#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
  69/* this is the size past which hardware will drop packets when setting LPE=1 */
  70#define MAXIMUM_ETHERNET_LPE_SIZE 16384
  71
  72#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
  73
  74/*
  75 * HW models:
  76 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
  77 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  78 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
  79 *  Others never tested
  80 */
  81
  82typedef struct E1000State_st {
  83    /*< private >*/
  84    PCIDevice parent_obj;
  85    /*< public >*/
  86
  87    NICState *nic;
  88    NICConf conf;
  89    MemoryRegion mmio;
  90    MemoryRegion io;
  91
  92    uint32_t mac_reg[0x8000];
  93    uint16_t phy_reg[0x20];
  94    uint16_t eeprom_data[64];
  95
  96    uint32_t rxbuf_size;
  97    uint32_t rxbuf_min_shift;
  98    struct e1000_tx {
  99        unsigned char header[256];
 100        unsigned char vlan_header[4];
 101        /* Fields vlan and data must not be reordered or separated. */
 102        unsigned char vlan[4];
 103        unsigned char data[0x10000];
 104        uint16_t size;
 105        unsigned char sum_needed;
 106        unsigned char vlan_needed;
 107        uint8_t ipcss;
 108        uint8_t ipcso;
 109        uint16_t ipcse;
 110        uint8_t tucss;
 111        uint8_t tucso;
 112        uint16_t tucse;
 113        uint8_t hdr_len;
 114        uint16_t mss;
 115        uint32_t paylen;
 116        uint16_t tso_frames;
 117        char tse;
 118        int8_t ip;
 119        int8_t tcp;
 120        char cptse;     // current packet tse bit
 121    } tx;
 122
 123    struct {
 124        uint32_t val_in;    /* shifted in from guest driver */
 125        uint16_t bitnum_in;
 126        uint16_t bitnum_out;
 127        uint16_t reading;
 128        uint32_t old_eecd;
 129    } eecd_state;
 130
 131    QEMUTimer *autoneg_timer;
 132
 133    QEMUTimer *mit_timer;      /* Mitigation timer. */
 134    bool mit_timer_on;         /* Mitigation timer is running. */
 135    bool mit_irq_level;        /* Tracks interrupt pin level. */
 136    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
 137
 138/* Compatibility flags for migration to/from qemu 1.3.0 and older */
 139#define E1000_FLAG_AUTONEG_BIT 0
 140#define E1000_FLAG_MIT_BIT 1
 141#define E1000_FLAG_MAC_BIT 2
 142#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 143#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 144#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 145    uint32_t compat_flags;
 146} E1000State;
 147
 148#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
 149
 150typedef struct E1000BaseClass {
 151    PCIDeviceClass parent_class;
 152    uint16_t phy_id2;
 153} E1000BaseClass;
 154
 155#define TYPE_E1000_BASE "e1000-base"
 156
 157#define E1000(obj) \
 158    OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
 159
 160#define E1000_DEVICE_CLASS(klass) \
 161     OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
 162#define E1000_DEVICE_GET_CLASS(obj) \
 163    OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
 164
 165#define defreg(x)    x = (E1000_##x>>2)
 166enum {
 167    defreg(CTRL),    defreg(EECD),    defreg(EERD),    defreg(GPRC),
 168    defreg(GPTC),    defreg(ICR),     defreg(ICS),     defreg(IMC),
 169    defreg(IMS),     defreg(LEDCTL),  defreg(MANC),    defreg(MDIC),
 170    defreg(MPC),     defreg(PBA),     defreg(RCTL),    defreg(RDBAH),
 171    defreg(RDBAL),   defreg(RDH),     defreg(RDLEN),   defreg(RDT),
 172    defreg(STATUS),  defreg(SWSM),    defreg(TCTL),    defreg(TDBAH),
 173    defreg(TDBAL),   defreg(TDH),     defreg(TDLEN),   defreg(TDT),
 174    defreg(TORH),    defreg(TORL),    defreg(TOTH),    defreg(TOTL),
 175    defreg(TPR),     defreg(TPT),     defreg(TXDCTL),  defreg(WUFC),
 176    defreg(RA),      defreg(MTA),     defreg(CRCERRS), defreg(VFTA),
 177    defreg(VET),     defreg(RDTR),    defreg(RADV),    defreg(TADV),
 178    defreg(ITR),     defreg(FCRUC),   defreg(TDFH),    defreg(TDFT),
 179    defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
 180    defreg(RDFT),    defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
 181    defreg(IPAV),    defreg(WUC),     defreg(WUS),     defreg(AIT),
 182    defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),    defreg(FFMT),
 183    defreg(FFVT),    defreg(WUPM),    defreg(PBM),     defreg(SCC),
 184    defreg(ECOL),    defreg(MCC),     defreg(LATECOL), defreg(COLC),
 185    defreg(DC),      defreg(TNCRS),   defreg(SEC),     defreg(CEXTERR),
 186    defreg(RLEC),    defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
 187    defreg(XOFFTXC), defreg(RFC),     defreg(RJC),     defreg(RNBC),
 188    defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
 189    defreg(RUC),     defreg(ROC),     defreg(GORCL),   defreg(GORCH),
 190    defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),    defreg(MPRC),
 191    defreg(TSCTC),   defreg(PRC64),   defreg(PRC127),  defreg(PRC255),
 192    defreg(PRC511),  defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
 193    defreg(PTC127),  defreg(PTC255),  defreg(PTC511),  defreg(PTC1023),
 194    defreg(PTC1522), defreg(MPTC),    defreg(BPTC)
 195};
 196
 197static void
 198e1000_link_down(E1000State *s)
 199{
 200    s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
 201    s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
 202    s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
 203    s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
 204}
 205
 206static void
 207e1000_link_up(E1000State *s)
 208{
 209    s->mac_reg[STATUS] |= E1000_STATUS_LU;
 210    s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
 211
 212    /* E1000_STATUS_LU is tested by e1000_can_receive() */
 213    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 214}
 215
 216static bool
 217have_autoneg(E1000State *s)
 218{
 219    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
 220}
 221
 222static void
 223set_phy_ctrl(E1000State *s, int index, uint16_t val)
 224{
 225    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
 226    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
 227                                   MII_CR_RESET |
 228                                   MII_CR_RESTART_AUTO_NEG);
 229
 230    /*
 231     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
 232     * migrate during auto negotiation, after migration the link will be
 233     * down.
 234     */
 235    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
 236        e1000_link_down(s);
 237        DBGOUT(PHY, "Start link auto negotiation\n");
 238        timer_mod(s->autoneg_timer,
 239                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 240    }
 241}
 242
 243static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 244    [PHY_CTRL] = set_phy_ctrl,
 245};
 246
 247enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 248
 249enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 250static const char phy_regcap[0x20] = {
 251    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 252    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
 253    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
 254    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
 255    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
 256    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
 257    [PHY_AUTONEG_EXP] = PHY_R,
 258};
 259
 260/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
 261static const uint16_t phy_reg_init[] = {
 262    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
 263                   MII_CR_FULL_DUPLEX |
 264                   MII_CR_AUTO_NEG_EN,
 265
 266    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
 267                   MII_SR_LINK_STATUS |   /* link initially up */
 268                   MII_SR_AUTONEG_CAPS |
 269                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
 270                   MII_SR_PREAMBLE_SUPPRESS |
 271                   MII_SR_EXTENDED_STATUS |
 272                   MII_SR_10T_HD_CAPS |
 273                   MII_SR_10T_FD_CAPS |
 274                   MII_SR_100X_HD_CAPS |
 275                   MII_SR_100X_FD_CAPS,
 276
 277    [PHY_ID1] = 0x141,
 278    /* [PHY_ID2] configured per DevId, from e1000_reset() */
 279    [PHY_AUTONEG_ADV] = 0xde1,
 280    [PHY_LP_ABILITY] = 0x1e0,
 281    [PHY_1000T_CTRL] = 0x0e00,
 282    [PHY_1000T_STATUS] = 0x3c00,
 283    [M88E1000_PHY_SPEC_CTRL] = 0x360,
 284    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 285    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
 286};
 287
 288static const uint32_t mac_reg_init[] = {
 289    [PBA]     = 0x00100030,
 290    [LEDCTL]  = 0x602,
 291    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 292                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 293    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 294                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 295                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 296                E1000_STATUS_LU,
 297    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 298                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 299                E1000_MANC_RMCP_EN,
 300};
 301
 302/* Helper function, *curr == 0 means the value is not set */
 303static inline void
 304mit_update_delay(uint32_t *curr, uint32_t value)
 305{
 306    if (value && (*curr == 0 || value < *curr)) {
 307        *curr = value;
 308    }
 309}
 310
 311static void
 312set_interrupt_cause(E1000State *s, int index, uint32_t val)
 313{
 314    PCIDevice *d = PCI_DEVICE(s);
 315    uint32_t pending_ints;
 316    uint32_t mit_delay;
 317
 318    s->mac_reg[ICR] = val;
 319
 320    /*
 321     * Make sure ICR and ICS registers have the same value.
 322     * The spec says that the ICS register is write-only.  However in practice,
 323     * on real hardware ICS is readable, and for reads it has the same value as
 324     * ICR (except that ICS does not have the clear on read behaviour of ICR).
 325     *
 326     * The VxWorks PRO/1000 driver uses this behaviour.
 327     */
 328    s->mac_reg[ICS] = val;
 329
 330    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
 331    if (!s->mit_irq_level && pending_ints) {
 332        /*
 333         * Here we detect a potential raising edge. We postpone raising the
 334         * interrupt line if we are inside the mitigation delay window
 335         * (s->mit_timer_on == 1).
 336         * We provide a partial implementation of interrupt mitigation,
 337         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
 338         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
 339         * RADV; relative timers based on TIDV and RDTR are not implemented.
 340         */
 341        if (s->mit_timer_on) {
 342            return;
 343        }
 344        if (chkflag(MIT)) {
 345            /* Compute the next mitigation delay according to pending
 346             * interrupts and the current values of RADV (provided
 347             * RDTR!=0), TADV and ITR.
 348             * Then rearm the timer.
 349             */
 350            mit_delay = 0;
 351            if (s->mit_ide &&
 352                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
 353                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
 354            }
 355            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
 356                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
 357            }
 358            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 359
 360            /*
 361             * According to e1000 SPEC, the Ethernet controller guarantees
 362             * a maximum observable interrupt rate of 7813 interrupts/sec.
 363             * Thus if mit_delay < 500 then the delay should be set to the
 364             * minimum delay possible which is 500.
 365             */
 366            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
 367
 368            if (mit_delay) {
 369                s->mit_timer_on = 1;
 370                timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 371                          mit_delay * 256);
 372            }
 373            s->mit_ide = 0;
 374        }
 375    }
 376
 377    s->mit_irq_level = (pending_ints != 0);
 378    pci_set_irq(d, s->mit_irq_level);
 379}
 380
 381static void
 382e1000_mit_timer(void *opaque)
 383{
 384    E1000State *s = opaque;
 385
 386    s->mit_timer_on = 0;
 387    /* Call set_interrupt_cause to update the irq level (if necessary). */
 388    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 389}
 390
 391static void
 392set_ics(E1000State *s, int index, uint32_t val)
 393{
 394    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 395        s->mac_reg[IMS]);
 396    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 397}
 398
 399static void
 400e1000_autoneg_timer(void *opaque)
 401{
 402    E1000State *s = opaque;
 403    if (!qemu_get_queue(s->nic)->link_down) {
 404        e1000_link_up(s);
 405        s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
 406        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 407        DBGOUT(PHY, "Auto negotiation is completed\n");
 408        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
 409    }
 410}
 411
 412static int
 413rxbufsize(uint32_t v)
 414{
 415    v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
 416         E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
 417         E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
 418    switch (v) {
 419    case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
 420        return 16384;
 421    case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
 422        return 8192;
 423    case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
 424        return 4096;
 425    case E1000_RCTL_SZ_1024:
 426        return 1024;
 427    case E1000_RCTL_SZ_512:
 428        return 512;
 429    case E1000_RCTL_SZ_256:
 430        return 256;
 431    }
 432    return 2048;
 433}
 434
 435static void e1000_reset(void *opaque)
 436{
 437    E1000State *d = opaque;
 438    E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
 439    uint8_t *macaddr = d->conf.macaddr.a;
 440    int i;
 441
 442    timer_del(d->autoneg_timer);
 443    timer_del(d->mit_timer);
 444    d->mit_timer_on = 0;
 445    d->mit_irq_level = 0;
 446    d->mit_ide = 0;
 447    memset(d->phy_reg, 0, sizeof d->phy_reg);
 448    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 449    d->phy_reg[PHY_ID2] = edc->phy_id2;
 450    memset(d->mac_reg, 0, sizeof d->mac_reg);
 451    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 452    d->rxbuf_min_shift = 1;
 453    memset(&d->tx, 0, sizeof d->tx);
 454
 455    if (qemu_get_queue(d->nic)->link_down) {
 456        e1000_link_down(d);
 457    }
 458
 459    /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
 460    d->mac_reg[RA] = 0;
 461    d->mac_reg[RA + 1] = E1000_RAH_AV;
 462    for (i = 0; i < 4; i++) {
 463        d->mac_reg[RA] |= macaddr[i] << (8 * i);
 464        d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
 465    }
 466    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
 467}
 468
 469static void
 470set_ctrl(E1000State *s, int index, uint32_t val)
 471{
 472    /* RST is self clearing */
 473    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 474}
 475
 476static void
 477set_rx_control(E1000State *s, int index, uint32_t val)
 478{
 479    s->mac_reg[RCTL] = val;
 480    s->rxbuf_size = rxbufsize(val);
 481    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 482    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 483           s->mac_reg[RCTL]);
 484    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 485}
 486
 487static void
 488set_mdic(E1000State *s, int index, uint32_t val)
 489{
 490    uint32_t data = val & E1000_MDIC_DATA_MASK;
 491    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 492
 493    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 494        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 495    else if (val & E1000_MDIC_OP_READ) {
 496        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 497        if (!(phy_regcap[addr] & PHY_R)) {
 498            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 499            val |= E1000_MDIC_ERROR;
 500        } else
 501            val = (val ^ data) | s->phy_reg[addr];
 502    } else if (val & E1000_MDIC_OP_WRITE) {
 503        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 504        if (!(phy_regcap[addr] & PHY_W)) {
 505            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 506            val |= E1000_MDIC_ERROR;
 507        } else {
 508            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 509                phyreg_writeops[addr](s, index, data);
 510            } else {
 511                s->phy_reg[addr] = data;
 512            }
 513        }
 514    }
 515    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 516
 517    if (val & E1000_MDIC_INT_EN) {
 518        set_ics(s, 0, E1000_ICR_MDAC);
 519    }
 520}
 521
 522static uint32_t
 523get_eecd(E1000State *s, int index)
 524{
 525    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 526
 527    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 528           s->eecd_state.bitnum_out, s->eecd_state.reading);
 529    if (!s->eecd_state.reading ||
 530        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 531          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 532        ret |= E1000_EECD_DO;
 533    return ret;
 534}
 535
 536static void
 537set_eecd(E1000State *s, int index, uint32_t val)
 538{
 539    uint32_t oldval = s->eecd_state.old_eecd;
 540
 541    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 542            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 543    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
 544        return;
 545    }
 546    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
 547        s->eecd_state.val_in = 0;
 548        s->eecd_state.bitnum_in = 0;
 549        s->eecd_state.bitnum_out = 0;
 550        s->eecd_state.reading = 0;
 551    }
 552    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
 553        return;
 554    }
 555    if (!(E1000_EECD_SK & val)) {               /* falling edge */
 556        s->eecd_state.bitnum_out++;
 557        return;
 558    }
 559    s->eecd_state.val_in <<= 1;
 560    if (val & E1000_EECD_DI)
 561        s->eecd_state.val_in |= 1;
 562    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 563        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 564        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 565            EEPROM_READ_OPCODE_MICROWIRE);
 566    }
 567    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 568           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 569           s->eecd_state.reading);
 570}
 571
 572static uint32_t
 573flash_eerd_read(E1000State *s, int x)
 574{
 575    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 576
 577    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 578        return (s->mac_reg[EERD]);
 579
 580    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 581        return (E1000_EEPROM_RW_REG_DONE | r);
 582
 583    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 584           E1000_EEPROM_RW_REG_DONE | r);
 585}
 586
 587static void
 588putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 589{
 590    uint32_t sum;
 591
 592    if (cse && cse < n)
 593        n = cse + 1;
 594    if (sloc < n-1) {
 595        sum = net_checksum_add(n-css, data+css);
 596        stw_be_p(data + sloc, net_checksum_finish(sum));
 597    }
 598}
 599
 600static inline void
 601inc_reg_if_not_full(E1000State *s, int index)
 602{
 603    if (s->mac_reg[index] != 0xffffffff) {
 604        s->mac_reg[index]++;
 605    }
 606}
 607
 608static inline void
 609inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
 610{
 611    if (!memcmp(arr, bcast, sizeof bcast)) {
 612        inc_reg_if_not_full(s, BPTC);
 613    } else if (arr[0] & 1) {
 614        inc_reg_if_not_full(s, MPTC);
 615    }
 616}
 617
 618static void
 619grow_8reg_if_not_full(E1000State *s, int index, int size)
 620{
 621    uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
 622
 623    if (sum + size < sum) {
 624        sum = ~0ULL;
 625    } else {
 626        sum += size;
 627    }
 628    s->mac_reg[index] = sum;
 629    s->mac_reg[index+1] = sum >> 32;
 630}
 631
 632static void
 633increase_size_stats(E1000State *s, const int *size_regs, int size)
 634{
 635    if (size > 1023) {
 636        inc_reg_if_not_full(s, size_regs[5]);
 637    } else if (size > 511) {
 638        inc_reg_if_not_full(s, size_regs[4]);
 639    } else if (size > 255) {
 640        inc_reg_if_not_full(s, size_regs[3]);
 641    } else if (size > 127) {
 642        inc_reg_if_not_full(s, size_regs[2]);
 643    } else if (size > 64) {
 644        inc_reg_if_not_full(s, size_regs[1]);
 645    } else if (size == 64) {
 646        inc_reg_if_not_full(s, size_regs[0]);
 647    }
 648}
 649
 650static inline int
 651vlan_enabled(E1000State *s)
 652{
 653    return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
 654}
 655
 656static inline int
 657vlan_rx_filter_enabled(E1000State *s)
 658{
 659    return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
 660}
 661
 662static inline int
 663is_vlan_packet(E1000State *s, const uint8_t *buf)
 664{
 665    return (be16_to_cpup((uint16_t *)(buf + 12)) ==
 666                le16_to_cpu(s->mac_reg[VET]));
 667}
 668
 669static inline int
 670is_vlan_txd(uint32_t txd_lower)
 671{
 672    return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
 673}
 674
 675/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
 676 * fill it in, just pad descriptor length by 4 bytes unless guest
 677 * told us to strip it off the packet. */
 678static inline int
 679fcs_len(E1000State *s)
 680{
 681    return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
 682}
 683
 684static void
 685e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 686{
 687    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 688                                    PTC1023, PTC1522 };
 689
 690    NetClientState *nc = qemu_get_queue(s->nic);
 691    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 692        nc->info->receive(nc, buf, size);
 693    } else {
 694        qemu_send_packet(nc, buf, size);
 695    }
 696    inc_tx_bcast_or_mcast_count(s, buf);
 697    increase_size_stats(s, PTCregs, size);
 698}
 699
 700static void
 701xmit_seg(E1000State *s)
 702{
 703    uint16_t len, *sp;
 704    unsigned int frames = s->tx.tso_frames, css, sofar;
 705    struct e1000_tx *tp = &s->tx;
 706
 707    if (tp->tse && tp->cptse) {
 708        css = tp->ipcss;
 709        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 710               frames, tp->size, css);
 711        if (tp->ip) {    /* IPv4 */
 712            stw_be_p(tp->data+css+2, tp->size - css);
 713            stw_be_p(tp->data+css+4,
 714                     be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
 715        } else {         /* IPv6 */
 716            stw_be_p(tp->data+css+4, tp->size - css);
 717        }
 718        css = tp->tucss;
 719        len = tp->size - css;
 720        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
 721        if (tp->tcp) {
 722            sofar = frames * tp->mss;
 723            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
 724            if (tp->paylen - sofar > tp->mss) {
 725                tp->data[css + 13] &= ~9;    /* PSH, FIN */
 726            } else if (frames) {
 727                inc_reg_if_not_full(s, TSCTC);
 728            }
 729        } else    /* UDP */
 730            stw_be_p(tp->data+css+4, len);
 731        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 732            unsigned int phsum;
 733            // add pseudo-header length before checksum calculation
 734            sp = (uint16_t *)(tp->data + tp->tucso);
 735            phsum = be16_to_cpup(sp) + len;
 736            phsum = (phsum >> 16) + (phsum & 0xffff);
 737            stw_be_p(sp, phsum);
 738        }
 739        tp->tso_frames++;
 740    }
 741
 742    if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
 743        putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
 744    if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
 745        putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
 746    if (tp->vlan_needed) {
 747        memmove(tp->vlan, tp->data, 4);
 748        memmove(tp->data, tp->data + 4, 8);
 749        memcpy(tp->data + 8, tp->vlan_header, 4);
 750        e1000_send_packet(s, tp->vlan, tp->size + 4);
 751    } else {
 752        e1000_send_packet(s, tp->data, tp->size);
 753    }
 754
 755    inc_reg_if_not_full(s, TPT);
 756    grow_8reg_if_not_full(s, TOTL, s->tx.size);
 757    s->mac_reg[GPTC] = s->mac_reg[TPT];
 758    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
 759    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
 760}
 761
 762static void
 763process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 764{
 765    PCIDevice *d = PCI_DEVICE(s);
 766    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 767    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 768    unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
 769    unsigned int msh = 0xfffff;
 770    uint64_t addr;
 771    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 772    struct e1000_tx *tp = &s->tx;
 773
 774    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
 775    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
 776        op = le32_to_cpu(xp->cmd_and_length);
 777        tp->ipcss = xp->lower_setup.ip_fields.ipcss;
 778        tp->ipcso = xp->lower_setup.ip_fields.ipcso;
 779        tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
 780        tp->tucss = xp->upper_setup.tcp_fields.tucss;
 781        tp->tucso = xp->upper_setup.tcp_fields.tucso;
 782        tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
 783        tp->paylen = op & 0xfffff;
 784        tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
 785        tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
 786        tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
 787        tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
 788        tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
 789        tp->tso_frames = 0;
 790        if (tp->tucso == 0) {    /* this is probably wrong */
 791            DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
 792            tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
 793        }
 794        return;
 795    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 796        // data descriptor
 797        if (tp->size == 0) {
 798            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 799        }
 800        tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
 801    } else {
 802        // legacy descriptor
 803        tp->cptse = 0;
 804    }
 805
 806    if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
 807        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 808        tp->vlan_needed = 1;
 809        stw_be_p(tp->vlan_header,
 810                      le16_to_cpu(s->mac_reg[VET]));
 811        stw_be_p(tp->vlan_header + 2,
 812                      le16_to_cpu(dp->upper.fields.special));
 813    }
 814
 815    addr = le64_to_cpu(dp->buffer_addr);
 816    if (tp->tse && tp->cptse) {
 817        msh = tp->hdr_len + tp->mss;
 818        do {
 819            bytes = split_size;
 820            if (tp->size + bytes > msh)
 821                bytes = msh - tp->size;
 822
 823            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 824            pci_dma_read(d, addr, tp->data + tp->size, bytes);
 825            sz = tp->size + bytes;
 826            if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
 827                memmove(tp->header, tp->data, tp->hdr_len);
 828            }
 829            tp->size = sz;
 830            addr += bytes;
 831            if (sz == msh) {
 832                xmit_seg(s);
 833                memmove(tp->data, tp->header, tp->hdr_len);
 834                tp->size = tp->hdr_len;
 835            }
 836            split_size -= bytes;
 837        } while (bytes && split_size);
 838    } else if (!tp->tse && tp->cptse) {
 839        // context descriptor TSE is not set, while data descriptor TSE is set
 840        DBGOUT(TXERR, "TCP segmentation error\n");
 841    } else {
 842        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 843        pci_dma_read(d, addr, tp->data + tp->size, split_size);
 844        tp->size += split_size;
 845    }
 846
 847    if (!(txd_lower & E1000_TXD_CMD_EOP))
 848        return;
 849    if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
 850        xmit_seg(s);
 851    }
 852    tp->tso_frames = 0;
 853    tp->sum_needed = 0;
 854    tp->vlan_needed = 0;
 855    tp->size = 0;
 856    tp->cptse = 0;
 857}
 858
 859static uint32_t
 860txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 861{
 862    PCIDevice *d = PCI_DEVICE(s);
 863    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 864
 865    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 866        return 0;
 867    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 868                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 869    dp->upper.data = cpu_to_le32(txd_upper);
 870    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
 871                  &dp->upper, sizeof(dp->upper));
 872    return E1000_ICR_TXDW;
 873}
 874
 875static uint64_t tx_desc_base(E1000State *s)
 876{
 877    uint64_t bah = s->mac_reg[TDBAH];
 878    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 879
 880    return (bah << 32) + bal;
 881}
 882
 883static void
 884start_xmit(E1000State *s)
 885{
 886    PCIDevice *d = PCI_DEVICE(s);
 887    dma_addr_t base;
 888    struct e1000_tx_desc desc;
 889    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 890
 891    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 892        DBGOUT(TX, "tx disabled\n");
 893        return;
 894    }
 895
 896    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 897        base = tx_desc_base(s) +
 898               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 899        pci_dma_read(d, base, &desc, sizeof(desc));
 900
 901        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 902               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 903               desc.upper.data);
 904
 905        process_tx_desc(s, &desc);
 906        cause |= txdesc_writeback(s, base, &desc);
 907
 908        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 909            s->mac_reg[TDH] = 0;
 910        /*
 911         * the following could happen only if guest sw assigns
 912         * bogus values to TDT/TDLEN.
 913         * there's nothing too intelligent we could do about this.
 914         */
 915        if (s->mac_reg[TDH] == tdh_start ||
 916            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
 917            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 918                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 919            break;
 920        }
 921    }
 922    set_ics(s, 0, cause);
 923}
 924
 925static int
 926receive_filter(E1000State *s, const uint8_t *buf, int size)
 927{
 928    static const int mta_shift[] = {4, 3, 2, 0};
 929    uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
 930    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
 931
 932    if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
 933        uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
 934        uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
 935                                     ((vid >> 5) & 0x7f));
 936        if ((vfta & (1 << (vid & 0x1f))) == 0)
 937            return 0;
 938    }
 939
 940    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
 941        return 1;
 942    }
 943
 944    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
 945        inc_reg_if_not_full(s, MPRC);
 946        return 1;
 947    }
 948
 949    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
 950        inc_reg_if_not_full(s, BPRC);
 951        return 1;
 952    }
 953
 954    for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
 955        if (!(rp[1] & E1000_RAH_AV))
 956            continue;
 957        ra[0] = cpu_to_le32(rp[0]);
 958        ra[1] = cpu_to_le32(rp[1]);
 959        if (!memcmp(buf, (uint8_t *)ra, 6)) {
 960            DBGOUT(RXFILTER,
 961                   "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
 962                   (int)(rp - s->mac_reg - RA)/2,
 963                   buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 964            return 1;
 965        }
 966    }
 967    DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
 968           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 969
 970    f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 971    f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
 972    if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
 973        inc_reg_if_not_full(s, MPRC);
 974        return 1;
 975    }
 976    DBGOUT(RXFILTER,
 977           "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
 978           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
 979           (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
 980           s->mac_reg[MTA + (f >> 5)]);
 981
 982    return 0;
 983}
 984
 985static void
 986e1000_set_link_status(NetClientState *nc)
 987{
 988    E1000State *s = qemu_get_nic_opaque(nc);
 989    uint32_t old_status = s->mac_reg[STATUS];
 990
 991    if (nc->link_down) {
 992        e1000_link_down(s);
 993    } else {
 994        if (have_autoneg(s) &&
 995            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
 996            /* emulate auto-negotiation if supported */
 997            timer_mod(s->autoneg_timer,
 998                      qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 999        } else {
1000            e1000_link_up(s);
1001        }
1002    }
1003
1004    if (s->mac_reg[STATUS] != old_status)
1005        set_ics(s, 0, E1000_ICR_LSC);
1006}
1007
1008static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1009{
1010    int bufs;
1011    /* Fast-path short packets */
1012    if (total_size <= s->rxbuf_size) {
1013        return s->mac_reg[RDH] != s->mac_reg[RDT];
1014    }
1015    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1016        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1017    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1018        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
1019            s->mac_reg[RDT] - s->mac_reg[RDH];
1020    } else {
1021        return false;
1022    }
1023    return total_size <= bufs * s->rxbuf_size;
1024}
1025
1026static int
1027e1000_can_receive(NetClientState *nc)
1028{
1029    E1000State *s = qemu_get_nic_opaque(nc);
1030
1031    return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1032        (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1033        (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1034        e1000_has_rxbufs(s, 1);
1035}
1036
1037static uint64_t rx_desc_base(E1000State *s)
1038{
1039    uint64_t bah = s->mac_reg[RDBAH];
1040    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1041
1042    return (bah << 32) + bal;
1043}
1044
1045static ssize_t
1046e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1047{
1048    E1000State *s = qemu_get_nic_opaque(nc);
1049    PCIDevice *d = PCI_DEVICE(s);
1050    struct e1000_rx_desc desc;
1051    dma_addr_t base;
1052    unsigned int n, rdt;
1053    uint32_t rdh_start;
1054    uint16_t vlan_special = 0;
1055    uint8_t vlan_status = 0;
1056    uint8_t min_buf[MIN_BUF_SIZE];
1057    struct iovec min_iov;
1058    uint8_t *filter_buf = iov->iov_base;
1059    size_t size = iov_size(iov, iovcnt);
1060    size_t iov_ofs = 0;
1061    size_t desc_offset;
1062    size_t desc_size;
1063    size_t total_size;
1064    static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1065                                    PRC1023, PRC1522 };
1066
1067    if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1068        return -1;
1069    }
1070
1071    if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1072        return -1;
1073    }
1074
1075    /* Pad to minimum Ethernet frame length */
1076    if (size < sizeof(min_buf)) {
1077        iov_to_buf(iov, iovcnt, 0, min_buf, size);
1078        memset(&min_buf[size], 0, sizeof(min_buf) - size);
1079        inc_reg_if_not_full(s, RUC);
1080        min_iov.iov_base = filter_buf = min_buf;
1081        min_iov.iov_len = size = sizeof(min_buf);
1082        iovcnt = 1;
1083        iov = &min_iov;
1084    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1085        /* This is very unlikely, but may happen. */
1086        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1087        filter_buf = min_buf;
1088    }
1089
1090    /* Discard oversized packets if !LPE and !SBP. */
1091    if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1092        (size > MAXIMUM_ETHERNET_VLAN_SIZE
1093        && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1094        && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1095        inc_reg_if_not_full(s, ROC);
1096        return size;
1097    }
1098
1099    if (!receive_filter(s, filter_buf, size)) {
1100        return size;
1101    }
1102
1103    if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1104        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1105                                                                + 14)));
1106        iov_ofs = 4;
1107        if (filter_buf == iov->iov_base) {
1108            memmove(filter_buf + 4, filter_buf, 12);
1109        } else {
1110            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1111            while (iov->iov_len <= iov_ofs) {
1112                iov_ofs -= iov->iov_len;
1113                iov++;
1114            }
1115        }
1116        vlan_status = E1000_RXD_STAT_VP;
1117        size -= 4;
1118    }
1119
1120    rdh_start = s->mac_reg[RDH];
1121    desc_offset = 0;
1122    total_size = size + fcs_len(s);
1123    if (!e1000_has_rxbufs(s, total_size)) {
1124            set_ics(s, 0, E1000_ICS_RXO);
1125            return -1;
1126    }
1127    do {
1128        desc_size = total_size - desc_offset;
1129        if (desc_size > s->rxbuf_size) {
1130            desc_size = s->rxbuf_size;
1131        }
1132        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1133        pci_dma_read(d, base, &desc, sizeof(desc));
1134        desc.special = vlan_special;
1135        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1136        if (desc.buffer_addr) {
1137            if (desc_offset < size) {
1138                size_t iov_copy;
1139                hwaddr ba = le64_to_cpu(desc.buffer_addr);
1140                size_t copy_size = size - desc_offset;
1141                if (copy_size > s->rxbuf_size) {
1142                    copy_size = s->rxbuf_size;
1143                }
1144                do {
1145                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1146                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1147                    copy_size -= iov_copy;
1148                    ba += iov_copy;
1149                    iov_ofs += iov_copy;
1150                    if (iov_ofs == iov->iov_len) {
1151                        iov++;
1152                        iov_ofs = 0;
1153                    }
1154                } while (copy_size);
1155            }
1156            desc_offset += desc_size;
1157            desc.length = cpu_to_le16(desc_size);
1158            if (desc_offset >= total_size) {
1159                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1160            } else {
1161                /* Guest zeroing out status is not a hardware requirement.
1162                   Clear EOP in case guest didn't do it. */
1163                desc.status &= ~E1000_RXD_STAT_EOP;
1164            }
1165        } else { // as per intel docs; skip descriptors with null buf addr
1166            DBGOUT(RX, "Null RX descriptor!!\n");
1167        }
1168        pci_dma_write(d, base, &desc, sizeof(desc));
1169
1170        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1171            s->mac_reg[RDH] = 0;
1172        /* see comment in start_xmit; same here */
1173        if (s->mac_reg[RDH] == rdh_start ||
1174            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
1175            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1176                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1177            set_ics(s, 0, E1000_ICS_RXO);
1178            return -1;
1179        }
1180    } while (desc_offset < total_size);
1181
1182    increase_size_stats(s, PRCregs, total_size);
1183    inc_reg_if_not_full(s, TPR);
1184    s->mac_reg[GPRC] = s->mac_reg[TPR];
1185    /* TOR - Total Octets Received:
1186     * This register includes bytes received in a packet from the <Destination
1187     * Address> field through the <CRC> field, inclusively.
1188     * Always include FCS length (4) in size.
1189     */
1190    grow_8reg_if_not_full(s, TORL, size+4);
1191    s->mac_reg[GORCL] = s->mac_reg[TORL];
1192    s->mac_reg[GORCH] = s->mac_reg[TORH];
1193
1194    n = E1000_ICS_RXT0;
1195    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1196        rdt += s->mac_reg[RDLEN] / sizeof(desc);
1197    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1198        s->rxbuf_min_shift)
1199        n |= E1000_ICS_RXDMT0;
1200
1201    set_ics(s, 0, n);
1202
1203    return size;
1204}
1205
1206static ssize_t
1207e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1208{
1209    const struct iovec iov = {
1210        .iov_base = (uint8_t *)buf,
1211        .iov_len = size
1212    };
1213
1214    return e1000_receive_iov(nc, &iov, 1);
1215}
1216
1217static uint32_t
1218mac_readreg(E1000State *s, int index)
1219{
1220    return s->mac_reg[index];
1221}
1222
1223static uint32_t
1224mac_low4_read(E1000State *s, int index)
1225{
1226    return s->mac_reg[index] & 0xf;
1227}
1228
1229static uint32_t
1230mac_low11_read(E1000State *s, int index)
1231{
1232    return s->mac_reg[index] & 0x7ff;
1233}
1234
1235static uint32_t
1236mac_low13_read(E1000State *s, int index)
1237{
1238    return s->mac_reg[index] & 0x1fff;
1239}
1240
1241static uint32_t
1242mac_low16_read(E1000State *s, int index)
1243{
1244    return s->mac_reg[index] & 0xffff;
1245}
1246
1247static uint32_t
1248mac_icr_read(E1000State *s, int index)
1249{
1250    uint32_t ret = s->mac_reg[ICR];
1251
1252    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1253    set_interrupt_cause(s, 0, 0);
1254    return ret;
1255}
1256
1257static uint32_t
1258mac_read_clr4(E1000State *s, int index)
1259{
1260    uint32_t ret = s->mac_reg[index];
1261
1262    s->mac_reg[index] = 0;
1263    return ret;
1264}
1265
1266static uint32_t
1267mac_read_clr8(E1000State *s, int index)
1268{
1269    uint32_t ret = s->mac_reg[index];
1270
1271    s->mac_reg[index] = 0;
1272    s->mac_reg[index-1] = 0;
1273    return ret;
1274}
1275
1276static void
1277mac_writereg(E1000State *s, int index, uint32_t val)
1278{
1279    uint32_t macaddr[2];
1280
1281    s->mac_reg[index] = val;
1282
1283    if (index == RA + 1) {
1284        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1285        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1286        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1287    }
1288}
1289
1290static void
1291set_rdt(E1000State *s, int index, uint32_t val)
1292{
1293    s->mac_reg[index] = val & 0xffff;
1294    if (e1000_has_rxbufs(s, 1)) {
1295        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1296    }
1297}
1298
1299static void
1300set_16bit(E1000State *s, int index, uint32_t val)
1301{
1302    s->mac_reg[index] = val & 0xffff;
1303}
1304
1305static void
1306set_dlen(E1000State *s, int index, uint32_t val)
1307{
1308    s->mac_reg[index] = val & 0xfff80;
1309}
1310
1311static void
1312set_tctl(E1000State *s, int index, uint32_t val)
1313{
1314    s->mac_reg[index] = val;
1315    s->mac_reg[TDT] &= 0xffff;
1316    start_xmit(s);
1317}
1318
1319static void
1320set_icr(E1000State *s, int index, uint32_t val)
1321{
1322    DBGOUT(INTERRUPT, "set_icr %x\n", val);
1323    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1324}
1325
1326static void
1327set_imc(E1000State *s, int index, uint32_t val)
1328{
1329    s->mac_reg[IMS] &= ~val;
1330    set_ics(s, 0, 0);
1331}
1332
1333static void
1334set_ims(E1000State *s, int index, uint32_t val)
1335{
1336    s->mac_reg[IMS] |= val;
1337    set_ics(s, 0, 0);
1338}
1339
1340#define getreg(x)    [x] = mac_readreg
1341static uint32_t (*macreg_readops[])(E1000State *, int) = {
1342    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
1343    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
1344    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
1345    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
1346    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
1347    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
1348    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
1349    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
1350    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
1351    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
1352    getreg(TNCRS),    getreg(SEC),      getreg(CEXTERR),  getreg(RLEC),
1353    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
1354    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
1355    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
1356    getreg(GOTCL),
1357
1358    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
1359    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
1360    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
1361    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
1362    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
1363    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
1364    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
1365    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
1366    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
1367    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
1368    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
1369    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
1370    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
1371    [MPTC]    = mac_read_clr4,
1372    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
1373    [EERD]    = flash_eerd_read,
1374    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
1375    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
1376    [RDFPC]   = mac_low13_read,
1377    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
1378    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
1379    [TDFPC]   = mac_low13_read,
1380    [AIT]     = mac_low16_read,
1381
1382    [CRCERRS ... MPC]   = &mac_readreg,
1383    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
1384    [FFLT ... FFLT+6]   = &mac_low11_read,
1385    [RA ... RA+31]      = &mac_readreg,
1386    [WUPM ... WUPM+31]  = &mac_readreg,
1387    [MTA ... MTA+127]   = &mac_readreg,
1388    [VFTA ... VFTA+127] = &mac_readreg,
1389    [FFMT ... FFMT+254] = &mac_low4_read,
1390    [FFVT ... FFVT+254] = &mac_readreg,
1391    [PBM ... PBM+16383] = &mac_readreg,
1392};
1393enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1394
1395#define putreg(x)    [x] = mac_writereg
1396static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1397    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
1398    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
1399    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
1400    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
1401    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
1402    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
1403    putreg(WUS),      putreg(AIT),
1404
1405    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
1406    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
1407    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
1408    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
1409    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
1410    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
1411    [ITR]    = set_16bit,
1412
1413    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1414    [FFLT ... FFLT+6]   = &mac_writereg,
1415    [RA ... RA+31]      = &mac_writereg,
1416    [WUPM ... WUPM+31]  = &mac_writereg,
1417    [MTA ... MTA+127]   = &mac_writereg,
1418    [VFTA ... VFTA+127] = &mac_writereg,
1419    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1420    [PBM ... PBM+16383] = &mac_writereg,
1421};
1422
1423enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1424
1425enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1426
1427#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1428/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1429 * f - flag bits (up to 6 possible flags)
1430 * n - flag needed
1431 * p - partially implenented */
1432static const uint8_t mac_reg_access[0x8000] = {
1433    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
1434    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
1435
1436    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
1437    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
1438    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
1439    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
1440    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
1441    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
1442    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
1443    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
1444    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
1445    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
1446    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
1447    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
1448    [SEC]     = markflag(MAC),    [CEXTERR] = markflag(MAC),
1449    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
1450    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
1451    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
1452    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
1453    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
1454    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
1455    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
1456    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
1457    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
1458    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
1459    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
1460    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
1461    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
1462    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
1463    [BPTC]    = markflag(MAC),
1464
1465    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1466    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1467    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1468    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1469    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1470    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1471    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
1472    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1473    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1474    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1475    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
1476};
1477
1478static void
1479e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1480                 unsigned size)
1481{
1482    E1000State *s = opaque;
1483    unsigned int index = (addr & 0x1ffff) >> 2;
1484
1485    if (index < NWRITEOPS && macreg_writeops[index]) {
1486        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1487            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1488            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1489                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1490                       "It is not fully implemented.\n", index<<2);
1491            }
1492            macreg_writeops[index](s, index, val);
1493        } else {    /* "flag needed" bit is set, but the flag is not active */
1494            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1495                   index<<2);
1496        }
1497    } else if (index < NREADOPS && macreg_readops[index]) {
1498        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1499               index<<2, val);
1500    } else {
1501        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1502               index<<2, val);
1503    }
1504}
1505
1506static uint64_t
1507e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1508{
1509    E1000State *s = opaque;
1510    unsigned int index = (addr & 0x1ffff) >> 2;
1511
1512    if (index < NREADOPS && macreg_readops[index]) {
1513        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1514            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1515            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1516                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1517                       "It is not fully implemented.\n", index<<2);
1518            }
1519            return macreg_readops[index](s, index);
1520        } else {    /* "flag needed" bit is set, but the flag is not active */
1521            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1522                   index<<2);
1523        }
1524    } else {
1525        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1526    }
1527    return 0;
1528}
1529
1530static const MemoryRegionOps e1000_mmio_ops = {
1531    .read = e1000_mmio_read,
1532    .write = e1000_mmio_write,
1533    .endianness = DEVICE_LITTLE_ENDIAN,
1534    .impl = {
1535        .min_access_size = 4,
1536        .max_access_size = 4,
1537    },
1538};
1539
1540static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1541                              unsigned size)
1542{
1543    E1000State *s = opaque;
1544
1545    (void)s;
1546    return 0;
1547}
1548
1549static void e1000_io_write(void *opaque, hwaddr addr,
1550                           uint64_t val, unsigned size)
1551{
1552    E1000State *s = opaque;
1553
1554    (void)s;
1555}
1556
1557static const MemoryRegionOps e1000_io_ops = {
1558    .read = e1000_io_read,
1559    .write = e1000_io_write,
1560    .endianness = DEVICE_LITTLE_ENDIAN,
1561};
1562
1563static bool is_version_1(void *opaque, int version_id)
1564{
1565    return version_id == 1;
1566}
1567
1568static void e1000_pre_save(void *opaque)
1569{
1570    E1000State *s = opaque;
1571    NetClientState *nc = qemu_get_queue(s->nic);
1572
1573    /* If the mitigation timer is active, emulate a timeout now. */
1574    if (s->mit_timer_on) {
1575        e1000_mit_timer(s);
1576    }
1577
1578    /*
1579     * If link is down and auto-negotiation is supported and ongoing,
1580     * complete auto-negotiation immediately. This allows us to look
1581     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1582     */
1583    if (nc->link_down && have_autoneg(s)) {
1584        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1585    }
1586}
1587
1588static int e1000_post_load(void *opaque, int version_id)
1589{
1590    E1000State *s = opaque;
1591    NetClientState *nc = qemu_get_queue(s->nic);
1592
1593    if (!chkflag(MIT)) {
1594        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1595            s->mac_reg[TADV] = 0;
1596        s->mit_irq_level = false;
1597    }
1598    s->mit_ide = 0;
1599    s->mit_timer_on = false;
1600
1601    /* nc.link_down can't be migrated, so infer link_down according
1602     * to link status bit in mac_reg[STATUS].
1603     * Alternatively, restart link negotiation if it was in progress. */
1604    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1605
1606    if (have_autoneg(s) &&
1607        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1608        nc->link_down = false;
1609        timer_mod(s->autoneg_timer,
1610                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1611    }
1612
1613    return 0;
1614}
1615
1616static bool e1000_mit_state_needed(void *opaque)
1617{
1618    E1000State *s = opaque;
1619
1620    return chkflag(MIT);
1621}
1622
1623static bool e1000_full_mac_needed(void *opaque)
1624{
1625    E1000State *s = opaque;
1626
1627    return chkflag(MAC);
1628}
1629
1630static const VMStateDescription vmstate_e1000_mit_state = {
1631    .name = "e1000/mit_state",
1632    .version_id = 1,
1633    .minimum_version_id = 1,
1634    .needed = e1000_mit_state_needed,
1635    .fields = (VMStateField[]) {
1636        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1637        VMSTATE_UINT32(mac_reg[RADV], E1000State),
1638        VMSTATE_UINT32(mac_reg[TADV], E1000State),
1639        VMSTATE_UINT32(mac_reg[ITR], E1000State),
1640        VMSTATE_BOOL(mit_irq_level, E1000State),
1641        VMSTATE_END_OF_LIST()
1642    }
1643};
1644
1645static const VMStateDescription vmstate_e1000_full_mac_state = {
1646    .name = "e1000/full_mac_state",
1647    .version_id = 1,
1648    .minimum_version_id = 1,
1649    .needed = e1000_full_mac_needed,
1650    .fields = (VMStateField[]) {
1651        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1652        VMSTATE_END_OF_LIST()
1653    }
1654};
1655
1656static const VMStateDescription vmstate_e1000 = {
1657    .name = "e1000",
1658    .version_id = 2,
1659    .minimum_version_id = 1,
1660    .pre_save = e1000_pre_save,
1661    .post_load = e1000_post_load,
1662    .fields = (VMStateField[]) {
1663        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1664        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1665        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1666        VMSTATE_UINT32(rxbuf_size, E1000State),
1667        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1668        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1669        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1670        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1671        VMSTATE_UINT16(eecd_state.reading, E1000State),
1672        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1673        VMSTATE_UINT8(tx.ipcss, E1000State),
1674        VMSTATE_UINT8(tx.ipcso, E1000State),
1675        VMSTATE_UINT16(tx.ipcse, E1000State),
1676        VMSTATE_UINT8(tx.tucss, E1000State),
1677        VMSTATE_UINT8(tx.tucso, E1000State),
1678        VMSTATE_UINT16(tx.tucse, E1000State),
1679        VMSTATE_UINT32(tx.paylen, E1000State),
1680        VMSTATE_UINT8(tx.hdr_len, E1000State),
1681        VMSTATE_UINT16(tx.mss, E1000State),
1682        VMSTATE_UINT16(tx.size, E1000State),
1683        VMSTATE_UINT16(tx.tso_frames, E1000State),
1684        VMSTATE_UINT8(tx.sum_needed, E1000State),
1685        VMSTATE_INT8(tx.ip, E1000State),
1686        VMSTATE_INT8(tx.tcp, E1000State),
1687        VMSTATE_BUFFER(tx.header, E1000State),
1688        VMSTATE_BUFFER(tx.data, E1000State),
1689        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1690        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1691        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1692        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1693        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1694        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1695        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1696        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1697        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1698        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1699        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1700        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1701        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1702        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1703        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1704        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1705        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1706        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1707        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1708        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1709        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1710        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1711        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1712        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1713        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1714        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1715        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1716        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1717        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1718        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1719        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1720        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1721        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1722        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1723        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1724        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1725        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1726        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1727        VMSTATE_UINT32(mac_reg[VET], E1000State),
1728        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1729        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1730        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1731        VMSTATE_END_OF_LIST()
1732    },
1733    .subsections = (const VMStateDescription*[]) {
1734        &vmstate_e1000_mit_state,
1735        &vmstate_e1000_full_mac_state,
1736        NULL
1737    }
1738};
1739
1740/*
1741 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1742 * Note: A valid DevId will be inserted during pci_e1000_init().
1743 */
1744static const uint16_t e1000_eeprom_template[64] = {
1745    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1746    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1747    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1748    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1749    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1750    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1751    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1752    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1753};
1754
1755/* PCI interface */
1756
1757static void
1758e1000_mmio_setup(E1000State *d)
1759{
1760    int i;
1761    const uint32_t excluded_regs[] = {
1762        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1763        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1764    };
1765
1766    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1767                          "e1000-mmio", PNPMMIO_SIZE);
1768    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1769    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1770        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1771                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1772    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1773}
1774
1775static void
1776pci_e1000_uninit(PCIDevice *dev)
1777{
1778    E1000State *d = E1000(dev);
1779
1780    timer_del(d->autoneg_timer);
1781    timer_free(d->autoneg_timer);
1782    timer_del(d->mit_timer);
1783    timer_free(d->mit_timer);
1784    qemu_del_nic(d->nic);
1785}
1786
1787static NetClientInfo net_e1000_info = {
1788    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1789    .size = sizeof(NICState),
1790    .can_receive = e1000_can_receive,
1791    .receive = e1000_receive,
1792    .receive_iov = e1000_receive_iov,
1793    .link_status_changed = e1000_set_link_status,
1794};
1795
1796static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1797                                uint32_t val, int len)
1798{
1799    E1000State *s = E1000(pci_dev);
1800
1801    pci_default_write_config(pci_dev, address, val, len);
1802
1803    if (range_covers_byte(address, len, PCI_COMMAND) &&
1804        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1805        qemu_flush_queued_packets(qemu_get_queue(s->nic));
1806    }
1807}
1808
1809
1810static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1811{
1812    DeviceState *dev = DEVICE(pci_dev);
1813    E1000State *d = E1000(pci_dev);
1814    PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1815    uint8_t *pci_conf;
1816    uint16_t checksum = 0;
1817    int i;
1818    uint8_t *macaddr;
1819
1820    pci_dev->config_write = e1000_write_config;
1821
1822    pci_conf = pci_dev->config;
1823
1824    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1825    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1826
1827    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1828
1829    e1000_mmio_setup(d);
1830
1831    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1832
1833    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1834
1835    memmove(d->eeprom_data, e1000_eeprom_template,
1836        sizeof e1000_eeprom_template);
1837    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1838    macaddr = d->conf.macaddr.a;
1839    for (i = 0; i < 3; i++)
1840        d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1841    d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1842    for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1843        checksum += d->eeprom_data[i];
1844    checksum = (uint16_t) EEPROM_SUM - checksum;
1845    d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1846
1847    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1848                          object_get_typename(OBJECT(d)), dev->id, d);
1849
1850    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1851
1852    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1853    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1854}
1855
1856static void qdev_e1000_reset(DeviceState *dev)
1857{
1858    E1000State *d = E1000(dev);
1859    e1000_reset(d);
1860}
1861
1862static Property e1000_properties[] = {
1863    DEFINE_NIC_PROPERTIES(E1000State, conf),
1864    DEFINE_PROP_BIT("autonegotiation", E1000State,
1865                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1866    DEFINE_PROP_BIT("mitigation", E1000State,
1867                    compat_flags, E1000_FLAG_MIT_BIT, true),
1868    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1869                    compat_flags, E1000_FLAG_MAC_BIT, true),
1870    DEFINE_PROP_END_OF_LIST(),
1871};
1872
1873typedef struct E1000Info {
1874    const char *name;
1875    uint16_t   device_id;
1876    uint8_t    revision;
1877    uint16_t   phy_id2;
1878} E1000Info;
1879
1880static void e1000_class_init(ObjectClass *klass, void *data)
1881{
1882    DeviceClass *dc = DEVICE_CLASS(klass);
1883    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1884    E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1885    const E1000Info *info = data;
1886
1887    k->realize = pci_e1000_realize;
1888    k->exit = pci_e1000_uninit;
1889    k->romfile = "efi-e1000.rom";
1890    k->vendor_id = PCI_VENDOR_ID_INTEL;
1891    k->device_id = info->device_id;
1892    k->revision = info->revision;
1893    e->phy_id2 = info->phy_id2;
1894    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1895    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1896    dc->desc = "Intel Gigabit Ethernet";
1897    dc->reset = qdev_e1000_reset;
1898    dc->vmsd = &vmstate_e1000;
1899    dc->props = e1000_properties;
1900}
1901
1902static void e1000_instance_init(Object *obj)
1903{
1904    E1000State *n = E1000(obj);
1905    device_add_bootindex_property(obj, &n->conf.bootindex,
1906                                  "bootindex", "/ethernet-phy@0",
1907                                  DEVICE(n), NULL);
1908}
1909
1910static const TypeInfo e1000_base_info = {
1911    .name          = TYPE_E1000_BASE,
1912    .parent        = TYPE_PCI_DEVICE,
1913    .instance_size = sizeof(E1000State),
1914    .instance_init = e1000_instance_init,
1915    .class_size    = sizeof(E1000BaseClass),
1916    .abstract      = true,
1917};
1918
1919static const E1000Info e1000_devices[] = {
1920    {
1921        .name      = "e1000",
1922        .device_id = E1000_DEV_ID_82540EM,
1923        .revision  = 0x03,
1924        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1925    },
1926    {
1927        .name      = "e1000-82544gc",
1928        .device_id = E1000_DEV_ID_82544GC_COPPER,
1929        .revision  = 0x03,
1930        .phy_id2   = E1000_PHY_ID2_82544x,
1931    },
1932    {
1933        .name      = "e1000-82545em",
1934        .device_id = E1000_DEV_ID_82545EM_COPPER,
1935        .revision  = 0x03,
1936        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
1937    },
1938};
1939
1940static void e1000_register_types(void)
1941{
1942    int i;
1943
1944    type_register_static(&e1000_base_info);
1945    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1946        const E1000Info *info = &e1000_devices[i];
1947        TypeInfo type_info = {};
1948
1949        type_info.name = info->name;
1950        type_info.parent = TYPE_E1000_BASE;
1951        type_info.class_data = (void *)info;
1952        type_info.class_init = e1000_class_init;
1953        type_info.instance_init = e1000_instance_init;
1954
1955        type_register(&type_info);
1956    }
1957}
1958
1959type_init(e1000_register_types)
1960