qemu/hw/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "hw.h"
  29#include "pci.h"
  30#include "net.h"
  31#include "net/checksum.h"
  32#include "loader.h"
  33#include "sysemu.h"
  34#include "dma.h"
  35
  36#include "e1000_hw.h"
  37
  38#define E1000_DEBUG
  39
  40#ifdef E1000_DEBUG
  41enum {
  42    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  43    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  44    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  45    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
  46};
  47#define DBGBIT(x)       (1<<DEBUG_##x)
  48static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  49
  50#define DBGOUT(what, fmt, ...) do { \
  51    if (debugflags & DBGBIT(what)) \
  52        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  53    } while (0)
  54#else
  55#define DBGOUT(what, fmt, ...) do {} while (0)
  56#endif
  57
  58#define IOPORT_SIZE       0x40
  59#define PNPMMIO_SIZE      0x20000
  60#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  61
  62/*
  63 * HW models:
  64 *  E1000_DEV_ID_82540EM works with Windows and Linux
  65 *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
  66 *      appears to perform better than 82540EM, but breaks with Linux 2.6.18
  67 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  68 *  Others never tested
  69 */
  70enum { E1000_DEVID = E1000_DEV_ID_82540EM };
  71
  72/*
  73 * May need to specify additional MAC-to-PHY entries --
  74 * Intel's Windows driver refuses to initialize unless they match
  75 */
  76enum {
  77    PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?         0xcc2 :
  78                   E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
  79                   /* default to E1000_DEV_ID_82540EM */        0xc20
  80};
  81
  82typedef struct E1000State_st {
  83    PCIDevice dev;
  84    NICState *nic;
  85    NICConf conf;
  86    MemoryRegion mmio;
  87    MemoryRegion io;
  88
  89    uint32_t mac_reg[0x8000];
  90    uint16_t phy_reg[0x20];
  91    uint16_t eeprom_data[64];
  92
  93    uint32_t rxbuf_size;
  94    uint32_t rxbuf_min_shift;
  95    int check_rxov;
  96    struct e1000_tx {
  97        unsigned char header[256];
  98        unsigned char vlan_header[4];
  99        /* Fields vlan and data must not be reordered or separated. */
 100        unsigned char vlan[4];
 101        unsigned char data[0x10000];
 102        uint16_t size;
 103        unsigned char sum_needed;
 104        unsigned char vlan_needed;
 105        uint8_t ipcss;
 106        uint8_t ipcso;
 107        uint16_t ipcse;
 108        uint8_t tucss;
 109        uint8_t tucso;
 110        uint16_t tucse;
 111        uint8_t hdr_len;
 112        uint16_t mss;
 113        uint32_t paylen;
 114        uint16_t tso_frames;
 115        char tse;
 116        int8_t ip;
 117        int8_t tcp;
 118        char cptse;     // current packet tse bit
 119    } tx;
 120
 121    struct {
 122        uint32_t val_in;        // shifted in from guest driver
 123        uint16_t bitnum_in;
 124        uint16_t bitnum_out;
 125        uint16_t reading;
 126        uint32_t old_eecd;
 127    } eecd_state;
 128
 129    QEMUTimer *autoneg_timer;
 130} E1000State;
 131
 132#define defreg(x)       x = (E1000_##x>>2)
 133enum {
 134    defreg(CTRL),       defreg(EECD),   defreg(EERD),   defreg(GPRC),
 135    defreg(GPTC),       defreg(ICR),    defreg(ICS),    defreg(IMC),
 136    defreg(IMS),        defreg(LEDCTL), defreg(MANC),   defreg(MDIC),
 137    defreg(MPC),        defreg(PBA),    defreg(RCTL),   defreg(RDBAH),
 138    defreg(RDBAL),      defreg(RDH),    defreg(RDLEN),  defreg(RDT),
 139    defreg(STATUS),     defreg(SWSM),   defreg(TCTL),   defreg(TDBAH),
 140    defreg(TDBAL),      defreg(TDH),    defreg(TDLEN),  defreg(TDT),
 141    defreg(TORH),       defreg(TORL),   defreg(TOTH),   defreg(TOTL),
 142    defreg(TPR),        defreg(TPT),    defreg(TXDCTL), defreg(WUFC),
 143    defreg(RA),         defreg(MTA),    defreg(CRCERRS),defreg(VFTA),
 144    defreg(VET),
 145};
 146
 147static void
 148e1000_link_down(E1000State *s)
 149{
 150    s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
 151    s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
 152}
 153
 154static void
 155e1000_link_up(E1000State *s)
 156{
 157    s->mac_reg[STATUS] |= E1000_STATUS_LU;
 158    s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
 159}
 160
 161static void
 162set_phy_ctrl(E1000State *s, int index, uint16_t val)
 163{
 164    if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
 165        s->nic->nc.link_down = true;
 166        e1000_link_down(s);
 167        s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
 168        DBGOUT(PHY, "Start link auto negotiation\n");
 169        qemu_mod_timer(s->autoneg_timer, qemu_get_clock_ms(vm_clock) + 500);
 170    }
 171}
 172
 173static void
 174e1000_autoneg_timer(void *opaque)
 175{
 176    E1000State *s = opaque;
 177    s->nic->nc.link_down = false;
 178    e1000_link_up(s);
 179    s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 180    DBGOUT(PHY, "Auto negotiation is completed\n");
 181}
 182
 183static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
 184    [PHY_CTRL] = set_phy_ctrl,
 185};
 186
 187enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
 188
 189enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 190static const char phy_regcap[0x20] = {
 191    [PHY_STATUS] = PHY_R,       [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 192    [PHY_ID1] = PHY_R,          [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
 193    [PHY_CTRL] = PHY_RW,        [PHY_1000T_CTRL] = PHY_RW,
 194    [PHY_LP_ABILITY] = PHY_R,   [PHY_1000T_STATUS] = PHY_R,
 195    [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
 196    [PHY_ID2] = PHY_R,          [M88E1000_PHY_SPEC_STATUS] = PHY_R
 197};
 198
 199static const uint16_t phy_reg_init[] = {
 200    [PHY_CTRL] = 0x1140,
 201    [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
 202    [PHY_ID1] = 0x141,                          [PHY_ID2] = PHY_ID2_INIT,
 203    [PHY_1000T_CTRL] = 0x0e00,                  [M88E1000_PHY_SPEC_CTRL] = 0x360,
 204    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,      [PHY_AUTONEG_ADV] = 0xde1,
 205    [PHY_LP_ABILITY] = 0x1e0,                   [PHY_1000T_STATUS] = 0x3c00,
 206    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
 207};
 208
 209static const uint32_t mac_reg_init[] = {
 210    [PBA] =     0x00100030,
 211    [LEDCTL] =  0x602,
 212    [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
 213                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
 214    [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
 215                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
 216                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
 217                E1000_STATUS_LU,
 218    [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
 219                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
 220                E1000_MANC_RMCP_EN,
 221};
 222
 223static void
 224set_interrupt_cause(E1000State *s, int index, uint32_t val)
 225{
 226    if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
 227        /* Only for 8257x */
 228        val |= E1000_ICR_INT_ASSERTED;
 229    }
 230    s->mac_reg[ICR] = val;
 231    s->mac_reg[ICS] = val;
 232    qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
 233}
 234
 235static void
 236set_ics(E1000State *s, int index, uint32_t val)
 237{
 238    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 239        s->mac_reg[IMS]);
 240    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 241}
 242
 243static int
 244rxbufsize(uint32_t v)
 245{
 246    v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
 247         E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
 248         E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
 249    switch (v) {
 250    case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
 251        return 16384;
 252    case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
 253        return 8192;
 254    case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
 255        return 4096;
 256    case E1000_RCTL_SZ_1024:
 257        return 1024;
 258    case E1000_RCTL_SZ_512:
 259        return 512;
 260    case E1000_RCTL_SZ_256:
 261        return 256;
 262    }
 263    return 2048;
 264}
 265
 266static void e1000_reset(void *opaque)
 267{
 268    E1000State *d = opaque;
 269
 270    qemu_del_timer(d->autoneg_timer);
 271    memset(d->phy_reg, 0, sizeof d->phy_reg);
 272    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
 273    memset(d->mac_reg, 0, sizeof d->mac_reg);
 274    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
 275    d->rxbuf_min_shift = 1;
 276    memset(&d->tx, 0, sizeof d->tx);
 277
 278    if (d->nic->nc.link_down) {
 279        e1000_link_down(d);
 280    }
 281}
 282
 283static void
 284set_ctrl(E1000State *s, int index, uint32_t val)
 285{
 286    /* RST is self clearing */
 287    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 288}
 289
 290static void
 291set_rx_control(E1000State *s, int index, uint32_t val)
 292{
 293    s->mac_reg[RCTL] = val;
 294    s->rxbuf_size = rxbufsize(val);
 295    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 296    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 297           s->mac_reg[RCTL]);
 298}
 299
 300static void
 301set_mdic(E1000State *s, int index, uint32_t val)
 302{
 303    uint32_t data = val & E1000_MDIC_DATA_MASK;
 304    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 305
 306    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 307        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 308    else if (val & E1000_MDIC_OP_READ) {
 309        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 310        if (!(phy_regcap[addr] & PHY_R)) {
 311            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 312            val |= E1000_MDIC_ERROR;
 313        } else
 314            val = (val ^ data) | s->phy_reg[addr];
 315    } else if (val & E1000_MDIC_OP_WRITE) {
 316        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 317        if (!(phy_regcap[addr] & PHY_W)) {
 318            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 319            val |= E1000_MDIC_ERROR;
 320        } else {
 321            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
 322                phyreg_writeops[addr](s, index, data);
 323            }
 324            s->phy_reg[addr] = data;
 325        }
 326    }
 327    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 328
 329    if (val & E1000_MDIC_INT_EN) {
 330        set_ics(s, 0, E1000_ICR_MDAC);
 331    }
 332}
 333
 334static uint32_t
 335get_eecd(E1000State *s, int index)
 336{
 337    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 338
 339    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 340           s->eecd_state.bitnum_out, s->eecd_state.reading);
 341    if (!s->eecd_state.reading ||
 342        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 343          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 344        ret |= E1000_EECD_DO;
 345    return ret;
 346}
 347
 348static void
 349set_eecd(E1000State *s, int index, uint32_t val)
 350{
 351    uint32_t oldval = s->eecd_state.old_eecd;
 352
 353    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 354            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 355    if (!(E1000_EECD_CS & val))                 // CS inactive; nothing to do
 356        return;
 357    if (E1000_EECD_CS & (val ^ oldval)) {       // CS rise edge; reset state
 358        s->eecd_state.val_in = 0;
 359        s->eecd_state.bitnum_in = 0;
 360        s->eecd_state.bitnum_out = 0;
 361        s->eecd_state.reading = 0;
 362    }
 363    if (!(E1000_EECD_SK & (val ^ oldval)))      // no clock edge
 364        return;
 365    if (!(E1000_EECD_SK & val)) {               // falling edge
 366        s->eecd_state.bitnum_out++;
 367        return;
 368    }
 369    s->eecd_state.val_in <<= 1;
 370    if (val & E1000_EECD_DI)
 371        s->eecd_state.val_in |= 1;
 372    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 373        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 374        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 375            EEPROM_READ_OPCODE_MICROWIRE);
 376    }
 377    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 378           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 379           s->eecd_state.reading);
 380}
 381
 382static uint32_t
 383flash_eerd_read(E1000State *s, int x)
 384{
 385    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 386
 387    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 388        return (s->mac_reg[EERD]);
 389
 390    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 391        return (E1000_EEPROM_RW_REG_DONE | r);
 392
 393    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 394           E1000_EEPROM_RW_REG_DONE | r);
 395}
 396
 397static void
 398putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 399{
 400    uint32_t sum;
 401
 402    if (cse && cse < n)
 403        n = cse + 1;
 404    if (sloc < n-1) {
 405        sum = net_checksum_add(n-css, data+css);
 406        cpu_to_be16wu((uint16_t *)(data + sloc),
 407                      net_checksum_finish(sum));
 408    }
 409}
 410
 411static inline int
 412vlan_enabled(E1000State *s)
 413{
 414    return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
 415}
 416
 417static inline int
 418vlan_rx_filter_enabled(E1000State *s)
 419{
 420    return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
 421}
 422
 423static inline int
 424is_vlan_packet(E1000State *s, const uint8_t *buf)
 425{
 426    return (be16_to_cpup((uint16_t *)(buf + 12)) ==
 427                le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
 428}
 429
 430static inline int
 431is_vlan_txd(uint32_t txd_lower)
 432{
 433    return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
 434}
 435
 436/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
 437 * fill it in, just pad descriptor length by 4 bytes unless guest
 438 * told us to strip it off the packet. */
 439static inline int
 440fcs_len(E1000State *s)
 441{
 442    return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
 443}
 444
 445static void
 446e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
 447{
 448    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
 449        s->nic->nc.info->receive(&s->nic->nc, buf, size);
 450    } else {
 451        qemu_send_packet(&s->nic->nc, buf, size);
 452    }
 453}
 454
 455static void
 456xmit_seg(E1000State *s)
 457{
 458    uint16_t len, *sp;
 459    unsigned int frames = s->tx.tso_frames, css, sofar, n;
 460    struct e1000_tx *tp = &s->tx;
 461
 462    if (tp->tse && tp->cptse) {
 463        css = tp->ipcss;
 464        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 465               frames, tp->size, css);
 466        if (tp->ip) {           // IPv4
 467            cpu_to_be16wu((uint16_t *)(tp->data+css+2),
 468                          tp->size - css);
 469            cpu_to_be16wu((uint16_t *)(tp->data+css+4),
 470                          be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
 471        } else                  // IPv6
 472            cpu_to_be16wu((uint16_t *)(tp->data+css+4),
 473                          tp->size - css);
 474        css = tp->tucss;
 475        len = tp->size - css;
 476        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
 477        if (tp->tcp) {
 478            sofar = frames * tp->mss;
 479            cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
 480                be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
 481            if (tp->paylen - sofar > tp->mss)
 482                tp->data[css + 13] &= ~9;               // PSH, FIN
 483        } else  // UDP
 484            cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
 485        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 486            unsigned int phsum;
 487            // add pseudo-header length before checksum calculation
 488            sp = (uint16_t *)(tp->data + tp->tucso);
 489            phsum = be16_to_cpup(sp) + len;
 490            phsum = (phsum >> 16) + (phsum & 0xffff);
 491            cpu_to_be16wu(sp, phsum);
 492        }
 493        tp->tso_frames++;
 494    }
 495
 496    if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
 497        putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
 498    if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
 499        putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
 500    if (tp->vlan_needed) {
 501        memmove(tp->vlan, tp->data, 4);
 502        memmove(tp->data, tp->data + 4, 8);
 503        memcpy(tp->data + 8, tp->vlan_header, 4);
 504        e1000_send_packet(s, tp->vlan, tp->size + 4);
 505    } else
 506        e1000_send_packet(s, tp->data, tp->size);
 507    s->mac_reg[TPT]++;
 508    s->mac_reg[GPTC]++;
 509    n = s->mac_reg[TOTL];
 510    if ((s->mac_reg[TOTL] += s->tx.size) < n)
 511        s->mac_reg[TOTH]++;
 512}
 513
 514static void
 515process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 516{
 517    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 518    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 519    unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
 520    unsigned int msh = 0xfffff, hdr = 0;
 521    uint64_t addr;
 522    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 523    struct e1000_tx *tp = &s->tx;
 524
 525    if (dtype == E1000_TXD_CMD_DEXT) {  // context descriptor
 526        op = le32_to_cpu(xp->cmd_and_length);
 527        tp->ipcss = xp->lower_setup.ip_fields.ipcss;
 528        tp->ipcso = xp->lower_setup.ip_fields.ipcso;
 529        tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
 530        tp->tucss = xp->upper_setup.tcp_fields.tucss;
 531        tp->tucso = xp->upper_setup.tcp_fields.tucso;
 532        tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
 533        tp->paylen = op & 0xfffff;
 534        tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
 535        tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
 536        tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
 537        tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
 538        tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
 539        tp->tso_frames = 0;
 540        if (tp->tucso == 0) {   // this is probably wrong
 541            DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
 542            tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
 543        }
 544        return;
 545    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 546        // data descriptor
 547        if (tp->size == 0) {
 548            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 549        }
 550        tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
 551    } else {
 552        // legacy descriptor
 553        tp->cptse = 0;
 554    }
 555
 556    if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
 557        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 558        tp->vlan_needed = 1;
 559        cpu_to_be16wu((uint16_t *)(tp->vlan_header),
 560                      le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
 561        cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
 562                      le16_to_cpu(dp->upper.fields.special));
 563    }
 564        
 565    addr = le64_to_cpu(dp->buffer_addr);
 566    if (tp->tse && tp->cptse) {
 567        hdr = tp->hdr_len;
 568        msh = hdr + tp->mss;
 569        do {
 570            bytes = split_size;
 571            if (tp->size + bytes > msh)
 572                bytes = msh - tp->size;
 573
 574            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
 575            pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes);
 576            if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
 577                memmove(tp->header, tp->data, hdr);
 578            tp->size = sz;
 579            addr += bytes;
 580            if (sz == msh) {
 581                xmit_seg(s);
 582                memmove(tp->data, tp->header, hdr);
 583                tp->size = hdr;
 584            }
 585        } while (split_size -= bytes);
 586    } else if (!tp->tse && tp->cptse) {
 587        // context descriptor TSE is not set, while data descriptor TSE is set
 588        DBGOUT(TXERR, "TCP segmentation error\n");
 589    } else {
 590        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
 591        pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size);
 592        tp->size += split_size;
 593    }
 594
 595    if (!(txd_lower & E1000_TXD_CMD_EOP))
 596        return;
 597    if (!(tp->tse && tp->cptse && tp->size < hdr))
 598        xmit_seg(s);
 599    tp->tso_frames = 0;
 600    tp->sum_needed = 0;
 601    tp->vlan_needed = 0;
 602    tp->size = 0;
 603    tp->cptse = 0;
 604}
 605
 606static uint32_t
 607txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 608{
 609    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 610
 611    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 612        return 0;
 613    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 614                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 615    dp->upper.data = cpu_to_le32(txd_upper);
 616    pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp),
 617                  &dp->upper, sizeof(dp->upper));
 618    return E1000_ICR_TXDW;
 619}
 620
 621static uint64_t tx_desc_base(E1000State *s)
 622{
 623    uint64_t bah = s->mac_reg[TDBAH];
 624    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 625
 626    return (bah << 32) + bal;
 627}
 628
 629static void
 630start_xmit(E1000State *s)
 631{
 632    dma_addr_t base;
 633    struct e1000_tx_desc desc;
 634    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 635
 636    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 637        DBGOUT(TX, "tx disabled\n");
 638        return;
 639    }
 640
 641    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 642        base = tx_desc_base(s) +
 643               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 644        pci_dma_read(&s->dev, base, &desc, sizeof(desc));
 645
 646        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 647               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 648               desc.upper.data);
 649
 650        process_tx_desc(s, &desc);
 651        cause |= txdesc_writeback(s, base, &desc);
 652
 653        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 654            s->mac_reg[TDH] = 0;
 655        /*
 656         * the following could happen only if guest sw assigns
 657         * bogus values to TDT/TDLEN.
 658         * there's nothing too intelligent we could do about this.
 659         */
 660        if (s->mac_reg[TDH] == tdh_start) {
 661            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 662                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 663            break;
 664        }
 665    }
 666    set_ics(s, 0, cause);
 667}
 668
 669static int
 670receive_filter(E1000State *s, const uint8_t *buf, int size)
 671{
 672    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 673    static const int mta_shift[] = {4, 3, 2, 0};
 674    uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
 675
 676    if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
 677        uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
 678        uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
 679                                     ((vid >> 5) & 0x7f));
 680        if ((vfta & (1 << (vid & 0x1f))) == 0)
 681            return 0;
 682    }
 683
 684    if (rctl & E1000_RCTL_UPE)                  // promiscuous
 685        return 1;
 686
 687    if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))        // promiscuous mcast
 688        return 1;
 689
 690    if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
 691        return 1;
 692
 693    for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
 694        if (!(rp[1] & E1000_RAH_AV))
 695            continue;
 696        ra[0] = cpu_to_le32(rp[0]);
 697        ra[1] = cpu_to_le32(rp[1]);
 698        if (!memcmp(buf, (uint8_t *)ra, 6)) {
 699            DBGOUT(RXFILTER,
 700                   "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
 701                   (int)(rp - s->mac_reg - RA)/2,
 702                   buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 703            return 1;
 704        }
 705    }
 706    DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
 707           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 708
 709    f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 710    f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
 711    if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
 712        return 1;
 713    DBGOUT(RXFILTER,
 714           "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
 715           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
 716           (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
 717           s->mac_reg[MTA + (f >> 5)]);
 718
 719    return 0;
 720}
 721
 722static void
 723e1000_set_link_status(NetClientState *nc)
 724{
 725    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 726    uint32_t old_status = s->mac_reg[STATUS];
 727
 728    if (nc->link_down) {
 729        e1000_link_down(s);
 730    } else {
 731        e1000_link_up(s);
 732    }
 733
 734    if (s->mac_reg[STATUS] != old_status)
 735        set_ics(s, 0, E1000_ICR_LSC);
 736}
 737
 738static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 739{
 740    int bufs;
 741    /* Fast-path short packets */
 742    if (total_size <= s->rxbuf_size) {
 743        return s->mac_reg[RDH] != s->mac_reg[RDT] || !s->check_rxov;
 744    }
 745    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 746        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 747    } else if (s->mac_reg[RDH] > s->mac_reg[RDT] || !s->check_rxov) {
 748        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 749            s->mac_reg[RDT] - s->mac_reg[RDH];
 750    } else {
 751        return false;
 752    }
 753    return total_size <= bufs * s->rxbuf_size;
 754}
 755
 756static int
 757e1000_can_receive(NetClientState *nc)
 758{
 759    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 760
 761    return (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
 762}
 763
 764static uint64_t rx_desc_base(E1000State *s)
 765{
 766    uint64_t bah = s->mac_reg[RDBAH];
 767    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 768
 769    return (bah << 32) + bal;
 770}
 771
 772static ssize_t
 773e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 774{
 775    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 776    struct e1000_rx_desc desc;
 777    dma_addr_t base;
 778    unsigned int n, rdt;
 779    uint32_t rdh_start;
 780    uint16_t vlan_special = 0;
 781    uint8_t vlan_status = 0, vlan_offset = 0;
 782    uint8_t min_buf[MIN_BUF_SIZE];
 783    size_t desc_offset;
 784    size_t desc_size;
 785    size_t total_size;
 786
 787    if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
 788        return -1;
 789
 790    /* Pad to minimum Ethernet frame length */
 791    if (size < sizeof(min_buf)) {
 792        memcpy(min_buf, buf, size);
 793        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 794        buf = min_buf;
 795        size = sizeof(min_buf);
 796    }
 797
 798    if (!receive_filter(s, buf, size))
 799        return size;
 800
 801    if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
 802        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
 803        memmove((uint8_t *)buf + 4, buf, 12);
 804        vlan_status = E1000_RXD_STAT_VP;
 805        vlan_offset = 4;
 806        size -= 4;
 807    }
 808
 809    rdh_start = s->mac_reg[RDH];
 810    desc_offset = 0;
 811    total_size = size + fcs_len(s);
 812    if (!e1000_has_rxbufs(s, total_size)) {
 813            set_ics(s, 0, E1000_ICS_RXO);
 814            return -1;
 815    }
 816    do {
 817        desc_size = total_size - desc_offset;
 818        if (desc_size > s->rxbuf_size) {
 819            desc_size = s->rxbuf_size;
 820        }
 821        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 822        pci_dma_read(&s->dev, base, &desc, sizeof(desc));
 823        desc.special = vlan_special;
 824        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 825        if (desc.buffer_addr) {
 826            if (desc_offset < size) {
 827                size_t copy_size = size - desc_offset;
 828                if (copy_size > s->rxbuf_size) {
 829                    copy_size = s->rxbuf_size;
 830                }
 831                pci_dma_write(&s->dev, le64_to_cpu(desc.buffer_addr),
 832                              buf + desc_offset + vlan_offset, copy_size);
 833            }
 834            desc_offset += desc_size;
 835            desc.length = cpu_to_le16(desc_size);
 836            if (desc_offset >= total_size) {
 837                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 838            } else {
 839                /* Guest zeroing out status is not a hardware requirement.
 840                   Clear EOP in case guest didn't do it. */
 841                desc.status &= ~E1000_RXD_STAT_EOP;
 842            }
 843        } else { // as per intel docs; skip descriptors with null buf addr
 844            DBGOUT(RX, "Null RX descriptor!!\n");
 845        }
 846        pci_dma_write(&s->dev, base, &desc, sizeof(desc));
 847
 848        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 849            s->mac_reg[RDH] = 0;
 850        s->check_rxov = 1;
 851        /* see comment in start_xmit; same here */
 852        if (s->mac_reg[RDH] == rdh_start) {
 853            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
 854                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
 855            set_ics(s, 0, E1000_ICS_RXO);
 856            return -1;
 857        }
 858    } while (desc_offset < total_size);
 859
 860    s->mac_reg[GPRC]++;
 861    s->mac_reg[TPR]++;
 862    /* TOR - Total Octets Received:
 863     * This register includes bytes received in a packet from the <Destination
 864     * Address> field through the <CRC> field, inclusively.
 865     */
 866    n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
 867    if (n < s->mac_reg[TORL])
 868        s->mac_reg[TORH]++;
 869    s->mac_reg[TORL] = n;
 870
 871    n = E1000_ICS_RXT0;
 872    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
 873        rdt += s->mac_reg[RDLEN] / sizeof(desc);
 874    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
 875        s->rxbuf_min_shift)
 876        n |= E1000_ICS_RXDMT0;
 877
 878    set_ics(s, 0, n);
 879
 880    return size;
 881}
 882
 883static uint32_t
 884mac_readreg(E1000State *s, int index)
 885{
 886    return s->mac_reg[index];
 887}
 888
 889static uint32_t
 890mac_icr_read(E1000State *s, int index)
 891{
 892    uint32_t ret = s->mac_reg[ICR];
 893
 894    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
 895    set_interrupt_cause(s, 0, 0);
 896    return ret;
 897}
 898
 899static uint32_t
 900mac_read_clr4(E1000State *s, int index)
 901{
 902    uint32_t ret = s->mac_reg[index];
 903
 904    s->mac_reg[index] = 0;
 905    return ret;
 906}
 907
 908static uint32_t
 909mac_read_clr8(E1000State *s, int index)
 910{
 911    uint32_t ret = s->mac_reg[index];
 912
 913    s->mac_reg[index] = 0;
 914    s->mac_reg[index-1] = 0;
 915    return ret;
 916}
 917
 918static void
 919mac_writereg(E1000State *s, int index, uint32_t val)
 920{
 921    s->mac_reg[index] = val;
 922}
 923
 924static void
 925set_rdt(E1000State *s, int index, uint32_t val)
 926{
 927    s->check_rxov = 0;
 928    s->mac_reg[index] = val & 0xffff;
 929}
 930
 931static void
 932set_16bit(E1000State *s, int index, uint32_t val)
 933{
 934    s->mac_reg[index] = val & 0xffff;
 935}
 936
 937static void
 938set_dlen(E1000State *s, int index, uint32_t val)
 939{
 940    s->mac_reg[index] = val & 0xfff80;
 941}
 942
 943static void
 944set_tctl(E1000State *s, int index, uint32_t val)
 945{
 946    s->mac_reg[index] = val;
 947    s->mac_reg[TDT] &= 0xffff;
 948    start_xmit(s);
 949}
 950
 951static void
 952set_icr(E1000State *s, int index, uint32_t val)
 953{
 954    DBGOUT(INTERRUPT, "set_icr %x\n", val);
 955    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
 956}
 957
 958static void
 959set_imc(E1000State *s, int index, uint32_t val)
 960{
 961    s->mac_reg[IMS] &= ~val;
 962    set_ics(s, 0, 0);
 963}
 964
 965static void
 966set_ims(E1000State *s, int index, uint32_t val)
 967{
 968    s->mac_reg[IMS] |= val;
 969    set_ics(s, 0, 0);
 970}
 971
 972#define getreg(x)       [x] = mac_readreg
 973static uint32_t (*macreg_readops[])(E1000State *, int) = {
 974    getreg(PBA),        getreg(RCTL),   getreg(TDH),    getreg(TXDCTL),
 975    getreg(WUFC),       getreg(TDT),    getreg(CTRL),   getreg(LEDCTL),
 976    getreg(MANC),       getreg(MDIC),   getreg(SWSM),   getreg(STATUS),
 977    getreg(TORL),       getreg(TOTL),   getreg(IMS),    getreg(TCTL),
 978    getreg(RDH),        getreg(RDT),    getreg(VET),    getreg(ICS),
 979    getreg(TDBAL),      getreg(TDBAH),  getreg(RDBAH),  getreg(RDBAL),
 980    getreg(TDLEN),      getreg(RDLEN),
 981
 982    [TOTH] = mac_read_clr8,     [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
 983    [GPTC] = mac_read_clr4,     [TPR] = mac_read_clr4,  [TPT] = mac_read_clr4,
 984    [ICR] = mac_icr_read,       [EECD] = get_eecd,      [EERD] = flash_eerd_read,
 985    [CRCERRS ... MPC] = &mac_readreg,
 986    [RA ... RA+31] = &mac_readreg,
 987    [MTA ... MTA+127] = &mac_readreg,
 988    [VFTA ... VFTA+127] = &mac_readreg,
 989};
 990enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
 991
 992#define putreg(x)       [x] = mac_writereg
 993static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
 994    putreg(PBA),        putreg(EERD),   putreg(SWSM),   putreg(WUFC),
 995    putreg(TDBAL),      putreg(TDBAH),  putreg(TXDCTL), putreg(RDBAH),
 996    putreg(RDBAL),      putreg(LEDCTL), putreg(VET),
 997    [TDLEN] = set_dlen, [RDLEN] = set_dlen,     [TCTL] = set_tctl,
 998    [TDT] = set_tctl,   [MDIC] = set_mdic,      [ICS] = set_ics,
 999    [TDH] = set_16bit,  [RDH] = set_16bit,      [RDT] = set_rdt,
1000    [IMC] = set_imc,    [IMS] = set_ims,        [ICR] = set_icr,
1001    [EECD] = set_eecd,  [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1002    [RA ... RA+31] = &mac_writereg,
1003    [MTA ... MTA+127] = &mac_writereg,
1004    [VFTA ... VFTA+127] = &mac_writereg,
1005};
1006
1007enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1008
1009static void
1010e1000_mmio_write(void *opaque, target_phys_addr_t addr, uint64_t val,
1011                 unsigned size)
1012{
1013    E1000State *s = opaque;
1014    unsigned int index = (addr & 0x1ffff) >> 2;
1015
1016    if (index < NWRITEOPS && macreg_writeops[index]) {
1017        macreg_writeops[index](s, index, val);
1018    } else if (index < NREADOPS && macreg_readops[index]) {
1019        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1020    } else {
1021        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1022               index<<2, val);
1023    }
1024}
1025
1026static uint64_t
1027e1000_mmio_read(void *opaque, target_phys_addr_t addr, unsigned size)
1028{
1029    E1000State *s = opaque;
1030    unsigned int index = (addr & 0x1ffff) >> 2;
1031
1032    if (index < NREADOPS && macreg_readops[index])
1033    {
1034        return macreg_readops[index](s, index);
1035    }
1036    DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1037    return 0;
1038}
1039
1040static const MemoryRegionOps e1000_mmio_ops = {
1041    .read = e1000_mmio_read,
1042    .write = e1000_mmio_write,
1043    .endianness = DEVICE_LITTLE_ENDIAN,
1044    .impl = {
1045        .min_access_size = 4,
1046        .max_access_size = 4,
1047    },
1048};
1049
1050static uint64_t e1000_io_read(void *opaque, target_phys_addr_t addr,
1051                              unsigned size)
1052{
1053    E1000State *s = opaque;
1054
1055    (void)s;
1056    return 0;
1057}
1058
1059static void e1000_io_write(void *opaque, target_phys_addr_t addr,
1060                           uint64_t val, unsigned size)
1061{
1062    E1000State *s = opaque;
1063
1064    (void)s;
1065}
1066
1067static const MemoryRegionOps e1000_io_ops = {
1068    .read = e1000_io_read,
1069    .write = e1000_io_write,
1070    .endianness = DEVICE_LITTLE_ENDIAN,
1071};
1072
1073static bool is_version_1(void *opaque, int version_id)
1074{
1075    return version_id == 1;
1076}
1077
1078static const VMStateDescription vmstate_e1000 = {
1079    .name = "e1000",
1080    .version_id = 2,
1081    .minimum_version_id = 1,
1082    .minimum_version_id_old = 1,
1083    .fields      = (VMStateField []) {
1084        VMSTATE_PCI_DEVICE(dev, E1000State),
1085        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1086        VMSTATE_UNUSED(4), /* Was mmio_base.  */
1087        VMSTATE_UINT32(rxbuf_size, E1000State),
1088        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1089        VMSTATE_UINT32(eecd_state.val_in, E1000State),
1090        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1091        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1092        VMSTATE_UINT16(eecd_state.reading, E1000State),
1093        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1094        VMSTATE_UINT8(tx.ipcss, E1000State),
1095        VMSTATE_UINT8(tx.ipcso, E1000State),
1096        VMSTATE_UINT16(tx.ipcse, E1000State),
1097        VMSTATE_UINT8(tx.tucss, E1000State),
1098        VMSTATE_UINT8(tx.tucso, E1000State),
1099        VMSTATE_UINT16(tx.tucse, E1000State),
1100        VMSTATE_UINT32(tx.paylen, E1000State),
1101        VMSTATE_UINT8(tx.hdr_len, E1000State),
1102        VMSTATE_UINT16(tx.mss, E1000State),
1103        VMSTATE_UINT16(tx.size, E1000State),
1104        VMSTATE_UINT16(tx.tso_frames, E1000State),
1105        VMSTATE_UINT8(tx.sum_needed, E1000State),
1106        VMSTATE_INT8(tx.ip, E1000State),
1107        VMSTATE_INT8(tx.tcp, E1000State),
1108        VMSTATE_BUFFER(tx.header, E1000State),
1109        VMSTATE_BUFFER(tx.data, E1000State),
1110        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1111        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1112        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1113        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1114        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1115        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1116        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1117        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1118        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1119        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1120        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1121        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1122        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1123        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1124        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1125        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1126        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1127        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1128        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1129        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1130        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1131        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1132        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1133        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1134        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1135        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1136        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1137        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1138        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1139        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1140        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1141        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1142        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1143        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1144        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1145        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1146        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1147        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1148        VMSTATE_UINT32(mac_reg[VET], E1000State),
1149        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1150        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1151        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1152        VMSTATE_END_OF_LIST()
1153    }
1154};
1155
1156static const uint16_t e1000_eeprom_template[64] = {
1157    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1158    0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1159    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1160    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1161    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1162    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1163    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1164    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1165};
1166
1167/* PCI interface */
1168
1169static void
1170e1000_mmio_setup(E1000State *d)
1171{
1172    int i;
1173    const uint32_t excluded_regs[] = {
1174        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1175        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1176    };
1177
1178    memory_region_init_io(&d->mmio, &e1000_mmio_ops, d, "e1000-mmio",
1179                          PNPMMIO_SIZE);
1180    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1181    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1182        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1183                                     excluded_regs[i+1] - excluded_regs[i] - 4);
1184    memory_region_init_io(&d->io, &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1185}
1186
1187static void
1188e1000_cleanup(NetClientState *nc)
1189{
1190    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
1191
1192    s->nic = NULL;
1193}
1194
1195static void
1196pci_e1000_uninit(PCIDevice *dev)
1197{
1198    E1000State *d = DO_UPCAST(E1000State, dev, dev);
1199
1200    qemu_del_timer(d->autoneg_timer);
1201    qemu_free_timer(d->autoneg_timer);
1202    memory_region_destroy(&d->mmio);
1203    memory_region_destroy(&d->io);
1204    qemu_del_net_client(&d->nic->nc);
1205}
1206
1207static NetClientInfo net_e1000_info = {
1208    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1209    .size = sizeof(NICState),
1210    .can_receive = e1000_can_receive,
1211    .receive = e1000_receive,
1212    .cleanup = e1000_cleanup,
1213    .link_status_changed = e1000_set_link_status,
1214};
1215
1216static int pci_e1000_init(PCIDevice *pci_dev)
1217{
1218    E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
1219    uint8_t *pci_conf;
1220    uint16_t checksum = 0;
1221    int i;
1222    uint8_t *macaddr;
1223
1224    pci_conf = d->dev.config;
1225
1226    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1227    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1228
1229    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1230
1231    e1000_mmio_setup(d);
1232
1233    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1234
1235    pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1236
1237    memmove(d->eeprom_data, e1000_eeprom_template,
1238        sizeof e1000_eeprom_template);
1239    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1240    macaddr = d->conf.macaddr.a;
1241    for (i = 0; i < 3; i++)
1242        d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1243    for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1244        checksum += d->eeprom_data[i];
1245    checksum = (uint16_t) EEPROM_SUM - checksum;
1246    d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1247
1248    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1249                          object_get_typename(OBJECT(d)), d->dev.qdev.id, d);
1250
1251    qemu_format_nic_info_str(&d->nic->nc, macaddr);
1252
1253    add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/ethernet-phy@0");
1254
1255    d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
1256
1257    return 0;
1258}
1259
1260static void qdev_e1000_reset(DeviceState *dev)
1261{
1262    E1000State *d = DO_UPCAST(E1000State, dev.qdev, dev);
1263    e1000_reset(d);
1264}
1265
1266static Property e1000_properties[] = {
1267    DEFINE_NIC_PROPERTIES(E1000State, conf),
1268    DEFINE_PROP_END_OF_LIST(),
1269};
1270
1271static void e1000_class_init(ObjectClass *klass, void *data)
1272{
1273    DeviceClass *dc = DEVICE_CLASS(klass);
1274    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1275
1276    k->init = pci_e1000_init;
1277    k->exit = pci_e1000_uninit;
1278    k->romfile = "pxe-e1000.rom";
1279    k->vendor_id = PCI_VENDOR_ID_INTEL;
1280    k->device_id = E1000_DEVID;
1281    k->revision = 0x03;
1282    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1283    dc->desc = "Intel Gigabit Ethernet";
1284    dc->reset = qdev_e1000_reset;
1285    dc->vmsd = &vmstate_e1000;
1286    dc->props = e1000_properties;
1287}
1288
1289static TypeInfo e1000_info = {
1290    .name          = "e1000",
1291    .parent        = TYPE_PCI_DEVICE,
1292    .instance_size = sizeof(E1000State),
1293    .class_init    = e1000_class_init,
1294};
1295
1296static void e1000_register_types(void)
1297{
1298    type_register_static(&e1000_info);
1299}
1300
1301type_init(e1000_register_types)
1302