qemu/hw/e1000.c
<<
>>
Prefs
   1/*
   2 * QEMU e1000 emulation
   3 *
   4 * Software developer's manual:
   5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
   6 *
   7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
   8 * Copyright (c) 2008 Qumranet
   9 * Based on work done by:
  10 * Copyright (c) 2007 Dan Aloni
  11 * Copyright (c) 2004 Antony T Curtis
  12 *
  13 * This library is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU Lesser General Public
  15 * License as published by the Free Software Foundation; either
  16 * version 2 of the License, or (at your option) any later version.
  17 *
  18 * This library is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * Lesser General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU Lesser General Public
  24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  25 */
  26
  27
  28#include "hw.h"
  29#include "pci.h"
  30#include "net.h"
  31#include "net/checksum.h"
  32#include "loader.h"
  33#include "sysemu.h"
  34
  35#include "e1000_hw.h"
  36
  37#define E1000_DEBUG
  38
  39#ifdef E1000_DEBUG
  40enum {
  41    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
  42    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
  43    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
  44    DEBUG_RXFILTER,     DEBUG_NOTYET,
  45};
  46#define DBGBIT(x)       (1<<DEBUG_##x)
  47static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
  48
  49#define DBGOUT(what, fmt, ...) do { \
  50    if (debugflags & DBGBIT(what)) \
  51        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
  52    } while (0)
  53#else
  54#define DBGOUT(what, fmt, ...) do {} while (0)
  55#endif
  56
  57#define IOPORT_SIZE       0x40
  58#define PNPMMIO_SIZE      0x20000
  59#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
  60
  61/*
  62 * HW models:
  63 *  E1000_DEV_ID_82540EM works with Windows and Linux
  64 *  E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
  65 *      appears to perform better than 82540EM, but breaks with Linux 2.6.18
  66 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
  67 *  Others never tested
  68 */
  69enum { E1000_DEVID = E1000_DEV_ID_82540EM };
  70
  71/*
  72 * May need to specify additional MAC-to-PHY entries --
  73 * Intel's Windows driver refuses to initialize unless they match
  74 */
  75enum {
  76    PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ?         0xcc2 :
  77                   E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
  78                   /* default to E1000_DEV_ID_82540EM */        0xc20
  79};
  80
  81typedef struct E1000State_st {
  82    PCIDevice dev;
  83    NICState *nic;
  84    NICConf conf;
  85    int mmio_index;
  86
  87    uint32_t mac_reg[0x8000];
  88    uint16_t phy_reg[0x20];
  89    uint16_t eeprom_data[64];
  90
  91    uint32_t rxbuf_size;
  92    uint32_t rxbuf_min_shift;
  93    int check_rxov;
  94    struct e1000_tx {
  95        unsigned char header[256];
  96        unsigned char vlan_header[4];
  97        /* Fields vlan and data must not be reordered or separated. */
  98        unsigned char vlan[4];
  99        unsigned char data[0x10000];
 100        uint16_t size;
 101        unsigned char sum_needed;
 102        unsigned char vlan_needed;
 103        uint8_t ipcss;
 104        uint8_t ipcso;
 105        uint16_t ipcse;
 106        uint8_t tucss;
 107        uint8_t tucso;
 108        uint16_t tucse;
 109        uint8_t hdr_len;
 110        uint16_t mss;
 111        uint32_t paylen;
 112        uint16_t tso_frames;
 113        char tse;
 114        int8_t ip;
 115        int8_t tcp;
 116        char cptse;     // current packet tse bit
 117    } tx;
 118
 119    struct {
 120        uint32_t val_in;        // shifted in from guest driver
 121        uint16_t bitnum_in;
 122        uint16_t bitnum_out;
 123        uint16_t reading;
 124        uint32_t old_eecd;
 125    } eecd_state;
 126} E1000State;
 127
 128#define defreg(x)       x = (E1000_##x>>2)
 129enum {
 130    defreg(CTRL),       defreg(EECD),   defreg(EERD),   defreg(GPRC),
 131    defreg(GPTC),       defreg(ICR),    defreg(ICS),    defreg(IMC),
 132    defreg(IMS),        defreg(LEDCTL), defreg(MANC),   defreg(MDIC),
 133    defreg(MPC),        defreg(PBA),    defreg(RCTL),   defreg(RDBAH),
 134    defreg(RDBAL),      defreg(RDH),    defreg(RDLEN),  defreg(RDT),
 135    defreg(STATUS),     defreg(SWSM),   defreg(TCTL),   defreg(TDBAH),
 136    defreg(TDBAL),      defreg(TDH),    defreg(TDLEN),  defreg(TDT),
 137    defreg(TORH),       defreg(TORL),   defreg(TOTH),   defreg(TOTL),
 138    defreg(TPR),        defreg(TPT),    defreg(TXDCTL), defreg(WUFC),
 139    defreg(RA),         defreg(MTA),    defreg(CRCERRS),defreg(VFTA),
 140    defreg(VET),
 141};
 142
 143enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
 144static const char phy_regcap[0x20] = {
 145    [PHY_STATUS] = PHY_R,       [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
 146    [PHY_ID1] = PHY_R,          [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
 147    [PHY_CTRL] = PHY_RW,        [PHY_1000T_CTRL] = PHY_RW,
 148    [PHY_LP_ABILITY] = PHY_R,   [PHY_1000T_STATUS] = PHY_R,
 149    [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
 150    [PHY_ID2] = PHY_R,          [M88E1000_PHY_SPEC_STATUS] = PHY_R
 151};
 152
 153static void
 154ioport_map(PCIDevice *pci_dev, int region_num, pcibus_t addr,
 155           pcibus_t size, int type)
 156{
 157    DBGOUT(IO, "e1000_ioport_map addr=0x%04"FMT_PCIBUS
 158           " size=0x%08"FMT_PCIBUS"\n", addr, size);
 159}
 160
 161static void
 162set_interrupt_cause(E1000State *s, int index, uint32_t val)
 163{
 164    if (val)
 165        val |= E1000_ICR_INT_ASSERTED;
 166    s->mac_reg[ICR] = val;
 167    s->mac_reg[ICS] = val;
 168    qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
 169}
 170
 171static void
 172set_ics(E1000State *s, int index, uint32_t val)
 173{
 174    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
 175        s->mac_reg[IMS]);
 176    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
 177}
 178
 179static int
 180rxbufsize(uint32_t v)
 181{
 182    v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
 183         E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
 184         E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
 185    switch (v) {
 186    case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
 187        return 16384;
 188    case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
 189        return 8192;
 190    case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
 191        return 4096;
 192    case E1000_RCTL_SZ_1024:
 193        return 1024;
 194    case E1000_RCTL_SZ_512:
 195        return 512;
 196    case E1000_RCTL_SZ_256:
 197        return 256;
 198    }
 199    return 2048;
 200}
 201
 202static void
 203set_ctrl(E1000State *s, int index, uint32_t val)
 204{
 205    /* RST is self clearing */
 206    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 207}
 208
 209static void
 210set_rx_control(E1000State *s, int index, uint32_t val)
 211{
 212    s->mac_reg[RCTL] = val;
 213    s->rxbuf_size = rxbufsize(val);
 214    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
 215    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
 216           s->mac_reg[RCTL]);
 217}
 218
 219static void
 220set_mdic(E1000State *s, int index, uint32_t val)
 221{
 222    uint32_t data = val & E1000_MDIC_DATA_MASK;
 223    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
 224
 225    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
 226        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
 227    else if (val & E1000_MDIC_OP_READ) {
 228        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
 229        if (!(phy_regcap[addr] & PHY_R)) {
 230            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
 231            val |= E1000_MDIC_ERROR;
 232        } else
 233            val = (val ^ data) | s->phy_reg[addr];
 234    } else if (val & E1000_MDIC_OP_WRITE) {
 235        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
 236        if (!(phy_regcap[addr] & PHY_W)) {
 237            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
 238            val |= E1000_MDIC_ERROR;
 239        } else
 240            s->phy_reg[addr] = data;
 241    }
 242    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
 243    set_ics(s, 0, E1000_ICR_MDAC);
 244}
 245
 246static uint32_t
 247get_eecd(E1000State *s, int index)
 248{
 249    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
 250
 251    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
 252           s->eecd_state.bitnum_out, s->eecd_state.reading);
 253    if (!s->eecd_state.reading ||
 254        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
 255          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
 256        ret |= E1000_EECD_DO;
 257    return ret;
 258}
 259
 260static void
 261set_eecd(E1000State *s, int index, uint32_t val)
 262{
 263    uint32_t oldval = s->eecd_state.old_eecd;
 264
 265    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
 266            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
 267    if (!(E1000_EECD_CS & val))                 // CS inactive; nothing to do
 268        return;
 269    if (E1000_EECD_CS & (val ^ oldval)) {       // CS rise edge; reset state
 270        s->eecd_state.val_in = 0;
 271        s->eecd_state.bitnum_in = 0;
 272        s->eecd_state.bitnum_out = 0;
 273        s->eecd_state.reading = 0;
 274    }
 275    if (!(E1000_EECD_SK & (val ^ oldval)))      // no clock edge
 276        return;
 277    if (!(E1000_EECD_SK & val)) {               // falling edge
 278        s->eecd_state.bitnum_out++;
 279        return;
 280    }
 281    s->eecd_state.val_in <<= 1;
 282    if (val & E1000_EECD_DI)
 283        s->eecd_state.val_in |= 1;
 284    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
 285        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
 286        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
 287            EEPROM_READ_OPCODE_MICROWIRE);
 288    }
 289    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
 290           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
 291           s->eecd_state.reading);
 292}
 293
 294static uint32_t
 295flash_eerd_read(E1000State *s, int x)
 296{
 297    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
 298
 299    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
 300        return (s->mac_reg[EERD]);
 301
 302    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
 303        return (E1000_EEPROM_RW_REG_DONE | r);
 304
 305    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
 306           E1000_EEPROM_RW_REG_DONE | r);
 307}
 308
 309static void
 310putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
 311{
 312    uint32_t sum;
 313
 314    if (cse && cse < n)
 315        n = cse + 1;
 316    if (sloc < n-1) {
 317        sum = net_checksum_add(n-css, data+css);
 318        cpu_to_be16wu((uint16_t *)(data + sloc),
 319                      net_checksum_finish(sum));
 320    }
 321}
 322
 323static inline int
 324vlan_enabled(E1000State *s)
 325{
 326    return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
 327}
 328
 329static inline int
 330vlan_rx_filter_enabled(E1000State *s)
 331{
 332    return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
 333}
 334
 335static inline int
 336is_vlan_packet(E1000State *s, const uint8_t *buf)
 337{
 338    return (be16_to_cpup((uint16_t *)(buf + 12)) ==
 339                le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
 340}
 341
 342static inline int
 343is_vlan_txd(uint32_t txd_lower)
 344{
 345    return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
 346}
 347
 348/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
 349 * fill it in, just pad descriptor length by 4 bytes unless guest
 350 * told us to strip it off the packet. */
 351static inline int
 352fcs_len(E1000State *s)
 353{
 354    return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
 355}
 356
 357static void
 358xmit_seg(E1000State *s)
 359{
 360    uint16_t len, *sp;
 361    unsigned int frames = s->tx.tso_frames, css, sofar, n;
 362    struct e1000_tx *tp = &s->tx;
 363
 364    if (tp->tse && tp->cptse) {
 365        css = tp->ipcss;
 366        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
 367               frames, tp->size, css);
 368        if (tp->ip) {           // IPv4
 369            cpu_to_be16wu((uint16_t *)(tp->data+css+2),
 370                          tp->size - css);
 371            cpu_to_be16wu((uint16_t *)(tp->data+css+4),
 372                          be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
 373        } else                  // IPv6
 374            cpu_to_be16wu((uint16_t *)(tp->data+css+4),
 375                          tp->size - css);
 376        css = tp->tucss;
 377        len = tp->size - css;
 378        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
 379        if (tp->tcp) {
 380            sofar = frames * tp->mss;
 381            cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
 382                be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
 383            if (tp->paylen - sofar > tp->mss)
 384                tp->data[css + 13] &= ~9;               // PSH, FIN
 385        } else  // UDP
 386            cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
 387        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
 388            unsigned int phsum;
 389            // add pseudo-header length before checksum calculation
 390            sp = (uint16_t *)(tp->data + tp->tucso);
 391            phsum = be16_to_cpup(sp) + len;
 392            phsum = (phsum >> 16) + (phsum & 0xffff);
 393            cpu_to_be16wu(sp, phsum);
 394        }
 395        tp->tso_frames++;
 396    }
 397
 398    if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
 399        putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
 400    if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
 401        putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
 402    if (tp->vlan_needed) {
 403        memmove(tp->vlan, tp->data, 4);
 404        memmove(tp->data, tp->data + 4, 8);
 405        memcpy(tp->data + 8, tp->vlan_header, 4);
 406        qemu_send_packet(&s->nic->nc, tp->vlan, tp->size + 4);
 407    } else
 408        qemu_send_packet(&s->nic->nc, tp->data, tp->size);
 409    s->mac_reg[TPT]++;
 410    s->mac_reg[GPTC]++;
 411    n = s->mac_reg[TOTL];
 412    if ((s->mac_reg[TOTL] += s->tx.size) < n)
 413        s->mac_reg[TOTH]++;
 414}
 415
 416static void
 417process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 418{
 419    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
 420    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
 421    unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
 422    unsigned int msh = 0xfffff, hdr = 0;
 423    uint64_t addr;
 424    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
 425    struct e1000_tx *tp = &s->tx;
 426
 427    if (dtype == E1000_TXD_CMD_DEXT) {  // context descriptor
 428        op = le32_to_cpu(xp->cmd_and_length);
 429        tp->ipcss = xp->lower_setup.ip_fields.ipcss;
 430        tp->ipcso = xp->lower_setup.ip_fields.ipcso;
 431        tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
 432        tp->tucss = xp->upper_setup.tcp_fields.tucss;
 433        tp->tucso = xp->upper_setup.tcp_fields.tucso;
 434        tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
 435        tp->paylen = op & 0xfffff;
 436        tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
 437        tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
 438        tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
 439        tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
 440        tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
 441        tp->tso_frames = 0;
 442        if (tp->tucso == 0) {   // this is probably wrong
 443            DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
 444            tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
 445        }
 446        return;
 447    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
 448        // data descriptor
 449        if (tp->size == 0) {
 450            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
 451        }
 452        tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
 453    } else {
 454        // legacy descriptor
 455        tp->cptse = 0;
 456    }
 457
 458    if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
 459        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
 460        tp->vlan_needed = 1;
 461        cpu_to_be16wu((uint16_t *)(tp->vlan_header),
 462                      le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
 463        cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
 464                      le16_to_cpu(dp->upper.fields.special));
 465    }
 466        
 467    addr = le64_to_cpu(dp->buffer_addr);
 468    if (tp->tse && tp->cptse) {
 469        hdr = tp->hdr_len;
 470        msh = hdr + tp->mss;
 471        do {
 472            bytes = split_size;
 473            if (tp->size + bytes > msh)
 474                bytes = msh - tp->size;
 475            cpu_physical_memory_read(addr, tp->data + tp->size, bytes);
 476            if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
 477                memmove(tp->header, tp->data, hdr);
 478            tp->size = sz;
 479            addr += bytes;
 480            if (sz == msh) {
 481                xmit_seg(s);
 482                memmove(tp->data, tp->header, hdr);
 483                tp->size = hdr;
 484            }
 485        } while (split_size -= bytes);
 486    } else if (!tp->tse && tp->cptse) {
 487        // context descriptor TSE is not set, while data descriptor TSE is set
 488        DBGOUT(TXERR, "TCP segmentaion Error\n");
 489    } else {
 490        cpu_physical_memory_read(addr, tp->data + tp->size, split_size);
 491        tp->size += split_size;
 492    }
 493
 494    if (!(txd_lower & E1000_TXD_CMD_EOP))
 495        return;
 496    if (!(tp->tse && tp->cptse && tp->size < hdr))
 497        xmit_seg(s);
 498    tp->tso_frames = 0;
 499    tp->sum_needed = 0;
 500    tp->vlan_needed = 0;
 501    tp->size = 0;
 502    tp->cptse = 0;
 503}
 504
 505static uint32_t
 506txdesc_writeback(target_phys_addr_t base, struct e1000_tx_desc *dp)
 507{
 508    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 509
 510    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
 511        return 0;
 512    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
 513                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
 514    dp->upper.data = cpu_to_le32(txd_upper);
 515    cpu_physical_memory_write(base + ((char *)&dp->upper - (char *)dp),
 516                              (void *)&dp->upper, sizeof(dp->upper));
 517    return E1000_ICR_TXDW;
 518}
 519
 520static uint64_t tx_desc_base(E1000State *s)
 521{
 522    uint64_t bah = s->mac_reg[TDBAH];
 523    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
 524
 525    return (bah << 32) + bal;
 526}
 527
 528static void
 529start_xmit(E1000State *s)
 530{
 531    target_phys_addr_t base;
 532    struct e1000_tx_desc desc;
 533    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 534
 535    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
 536        DBGOUT(TX, "tx disabled\n");
 537        return;
 538    }
 539
 540    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
 541        base = tx_desc_base(s) +
 542               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
 543        cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
 544
 545        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
 546               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
 547               desc.upper.data);
 548
 549        process_tx_desc(s, &desc);
 550        cause |= txdesc_writeback(base, &desc);
 551
 552        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
 553            s->mac_reg[TDH] = 0;
 554        /*
 555         * the following could happen only if guest sw assigns
 556         * bogus values to TDT/TDLEN.
 557         * there's nothing too intelligent we could do about this.
 558         */
 559        if (s->mac_reg[TDH] == tdh_start) {
 560            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
 561                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
 562            break;
 563        }
 564    }
 565    set_ics(s, 0, cause);
 566}
 567
 568static int
 569receive_filter(E1000State *s, const uint8_t *buf, int size)
 570{
 571    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 572    static const int mta_shift[] = {4, 3, 2, 0};
 573    uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
 574
 575    if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
 576        uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
 577        uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
 578                                     ((vid >> 5) & 0x7f));
 579        if ((vfta & (1 << (vid & 0x1f))) == 0)
 580            return 0;
 581    }
 582
 583    if (rctl & E1000_RCTL_UPE)                  // promiscuous
 584        return 1;
 585
 586    if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE))        // promiscuous mcast
 587        return 1;
 588
 589    if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
 590        return 1;
 591
 592    for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
 593        if (!(rp[1] & E1000_RAH_AV))
 594            continue;
 595        ra[0] = cpu_to_le32(rp[0]);
 596        ra[1] = cpu_to_le32(rp[1]);
 597        if (!memcmp(buf, (uint8_t *)ra, 6)) {
 598            DBGOUT(RXFILTER,
 599                   "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
 600                   (int)(rp - s->mac_reg - RA)/2,
 601                   buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 602            return 1;
 603        }
 604    }
 605    DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
 606           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 607
 608    f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 609    f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
 610    if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
 611        return 1;
 612    DBGOUT(RXFILTER,
 613           "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
 614           buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
 615           (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
 616           s->mac_reg[MTA + (f >> 5)]);
 617
 618    return 0;
 619}
 620
 621static void
 622e1000_set_link_status(VLANClientState *nc)
 623{
 624    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 625    uint32_t old_status = s->mac_reg[STATUS];
 626
 627    if (nc->link_down)
 628        s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
 629    else
 630        s->mac_reg[STATUS] |= E1000_STATUS_LU;
 631
 632    if (s->mac_reg[STATUS] != old_status)
 633        set_ics(s, 0, E1000_ICR_LSC);
 634}
 635
 636static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 637{
 638    int bufs;
 639    /* Fast-path short packets */
 640    if (total_size <= s->rxbuf_size) {
 641        return s->mac_reg[RDH] != s->mac_reg[RDT] || !s->check_rxov;
 642    }
 643    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
 644        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
 645    } else if (s->mac_reg[RDH] > s->mac_reg[RDT] || !s->check_rxov) {
 646        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
 647            s->mac_reg[RDT] - s->mac_reg[RDH];
 648    } else {
 649        return false;
 650    }
 651    return total_size <= bufs * s->rxbuf_size;
 652}
 653
 654static int
 655e1000_can_receive(VLANClientState *nc)
 656{
 657    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 658
 659    return (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
 660}
 661
 662static uint64_t rx_desc_base(E1000State *s)
 663{
 664    uint64_t bah = s->mac_reg[RDBAH];
 665    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
 666
 667    return (bah << 32) + bal;
 668}
 669
 670static ssize_t
 671e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
 672{
 673    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 674    struct e1000_rx_desc desc;
 675    target_phys_addr_t base;
 676    unsigned int n, rdt;
 677    uint32_t rdh_start;
 678    uint16_t vlan_special = 0;
 679    uint8_t vlan_status = 0, vlan_offset = 0;
 680    uint8_t min_buf[MIN_BUF_SIZE];
 681    size_t desc_offset;
 682    size_t desc_size;
 683    size_t total_size;
 684
 685    if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
 686        return -1;
 687
 688    /* Pad to minimum Ethernet frame length */
 689    if (size < sizeof(min_buf)) {
 690        memcpy(min_buf, buf, size);
 691        memset(&min_buf[size], 0, sizeof(min_buf) - size);
 692        buf = min_buf;
 693        size = sizeof(min_buf);
 694    }
 695
 696    if (!receive_filter(s, buf, size))
 697        return size;
 698
 699    if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
 700        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
 701        memmove((uint8_t *)buf + 4, buf, 12);
 702        vlan_status = E1000_RXD_STAT_VP;
 703        vlan_offset = 4;
 704        size -= 4;
 705    }
 706
 707    rdh_start = s->mac_reg[RDH];
 708    desc_offset = 0;
 709    total_size = size + fcs_len(s);
 710    if (!e1000_has_rxbufs(s, total_size)) {
 711            set_ics(s, 0, E1000_ICS_RXO);
 712            return -1;
 713    }
 714    do {
 715        desc_size = total_size - desc_offset;
 716        if (desc_size > s->rxbuf_size) {
 717            desc_size = s->rxbuf_size;
 718        }
 719        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
 720        cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
 721        desc.special = vlan_special;
 722        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
 723        if (desc.buffer_addr) {
 724            if (desc_offset < size) {
 725                size_t copy_size = size - desc_offset;
 726                if (copy_size > s->rxbuf_size) {
 727                    copy_size = s->rxbuf_size;
 728                }
 729                cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
 730                                          (void *)(buf + desc_offset + vlan_offset),
 731                                          copy_size);
 732            }
 733            desc_offset += desc_size;
 734            desc.length = cpu_to_le16(desc_size);
 735            if (desc_offset >= total_size) {
 736                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
 737            } else {
 738                /* Guest zeroing out status is not a hardware requirement.
 739                   Clear EOP in case guest didn't do it. */
 740                desc.status &= ~E1000_RXD_STAT_EOP;
 741            }
 742        } else { // as per intel docs; skip descriptors with null buf addr
 743            DBGOUT(RX, "Null RX descriptor!!\n");
 744        }
 745        cpu_physical_memory_write(base, (void *)&desc, sizeof(desc));
 746
 747        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
 748            s->mac_reg[RDH] = 0;
 749        s->check_rxov = 1;
 750        /* see comment in start_xmit; same here */
 751        if (s->mac_reg[RDH] == rdh_start) {
 752            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
 753                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
 754            set_ics(s, 0, E1000_ICS_RXO);
 755            return -1;
 756        }
 757    } while (desc_offset < total_size);
 758
 759    s->mac_reg[GPRC]++;
 760    s->mac_reg[TPR]++;
 761    /* TOR - Total Octets Received:
 762     * This register includes bytes received in a packet from the <Destination
 763     * Address> field through the <CRC> field, inclusively.
 764     */
 765    n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
 766    if (n < s->mac_reg[TORL])
 767        s->mac_reg[TORH]++;
 768    s->mac_reg[TORL] = n;
 769
 770    n = E1000_ICS_RXT0;
 771    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
 772        rdt += s->mac_reg[RDLEN] / sizeof(desc);
 773    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
 774        s->rxbuf_min_shift)
 775        n |= E1000_ICS_RXDMT0;
 776
 777    set_ics(s, 0, n);
 778
 779    return size;
 780}
 781
 782static uint32_t
 783mac_readreg(E1000State *s, int index)
 784{
 785    return s->mac_reg[index];
 786}
 787
 788static uint32_t
 789mac_icr_read(E1000State *s, int index)
 790{
 791    uint32_t ret = s->mac_reg[ICR];
 792
 793    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
 794    set_interrupt_cause(s, 0, 0);
 795    return ret;
 796}
 797
 798static uint32_t
 799mac_read_clr4(E1000State *s, int index)
 800{
 801    uint32_t ret = s->mac_reg[index];
 802
 803    s->mac_reg[index] = 0;
 804    return ret;
 805}
 806
 807static uint32_t
 808mac_read_clr8(E1000State *s, int index)
 809{
 810    uint32_t ret = s->mac_reg[index];
 811
 812    s->mac_reg[index] = 0;
 813    s->mac_reg[index-1] = 0;
 814    return ret;
 815}
 816
 817static void
 818mac_writereg(E1000State *s, int index, uint32_t val)
 819{
 820    s->mac_reg[index] = val;
 821}
 822
 823static void
 824set_rdt(E1000State *s, int index, uint32_t val)
 825{
 826    s->check_rxov = 0;
 827    s->mac_reg[index] = val & 0xffff;
 828}
 829
 830static void
 831set_16bit(E1000State *s, int index, uint32_t val)
 832{
 833    s->mac_reg[index] = val & 0xffff;
 834}
 835
 836static void
 837set_dlen(E1000State *s, int index, uint32_t val)
 838{
 839    s->mac_reg[index] = val & 0xfff80;
 840}
 841
 842static void
 843set_tctl(E1000State *s, int index, uint32_t val)
 844{
 845    s->mac_reg[index] = val;
 846    s->mac_reg[TDT] &= 0xffff;
 847    start_xmit(s);
 848}
 849
 850static void
 851set_icr(E1000State *s, int index, uint32_t val)
 852{
 853    DBGOUT(INTERRUPT, "set_icr %x\n", val);
 854    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
 855}
 856
 857static void
 858set_imc(E1000State *s, int index, uint32_t val)
 859{
 860    s->mac_reg[IMS] &= ~val;
 861    set_ics(s, 0, 0);
 862}
 863
 864static void
 865set_ims(E1000State *s, int index, uint32_t val)
 866{
 867    s->mac_reg[IMS] |= val;
 868    set_ics(s, 0, 0);
 869}
 870
 871#define getreg(x)       [x] = mac_readreg
 872static uint32_t (*macreg_readops[])(E1000State *, int) = {
 873    getreg(PBA),        getreg(RCTL),   getreg(TDH),    getreg(TXDCTL),
 874    getreg(WUFC),       getreg(TDT),    getreg(CTRL),   getreg(LEDCTL),
 875    getreg(MANC),       getreg(MDIC),   getreg(SWSM),   getreg(STATUS),
 876    getreg(TORL),       getreg(TOTL),   getreg(IMS),    getreg(TCTL),
 877    getreg(RDH),        getreg(RDT),    getreg(VET),    getreg(ICS),
 878    getreg(TDBAL),      getreg(TDBAH),  getreg(RDBAH),  getreg(RDBAL),
 879    getreg(TDLEN),      getreg(RDLEN),
 880
 881    [TOTH] = mac_read_clr8,     [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
 882    [GPTC] = mac_read_clr4,     [TPR] = mac_read_clr4,  [TPT] = mac_read_clr4,
 883    [ICR] = mac_icr_read,       [EECD] = get_eecd,      [EERD] = flash_eerd_read,
 884    [CRCERRS ... MPC] = &mac_readreg,
 885    [RA ... RA+31] = &mac_readreg,
 886    [MTA ... MTA+127] = &mac_readreg,
 887    [VFTA ... VFTA+127] = &mac_readreg,
 888};
 889enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
 890
 891#define putreg(x)       [x] = mac_writereg
 892static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
 893    putreg(PBA),        putreg(EERD),   putreg(SWSM),   putreg(WUFC),
 894    putreg(TDBAL),      putreg(TDBAH),  putreg(TXDCTL), putreg(RDBAH),
 895    putreg(RDBAL),      putreg(LEDCTL), putreg(VET),
 896    [TDLEN] = set_dlen, [RDLEN] = set_dlen,     [TCTL] = set_tctl,
 897    [TDT] = set_tctl,   [MDIC] = set_mdic,      [ICS] = set_ics,
 898    [TDH] = set_16bit,  [RDH] = set_16bit,      [RDT] = set_rdt,
 899    [IMC] = set_imc,    [IMS] = set_ims,        [ICR] = set_icr,
 900    [EECD] = set_eecd,  [RCTL] = set_rx_control, [CTRL] = set_ctrl,
 901    [RA ... RA+31] = &mac_writereg,
 902    [MTA ... MTA+127] = &mac_writereg,
 903    [VFTA ... VFTA+127] = &mac_writereg,
 904};
 905enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
 906
 907static void
 908e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 909{
 910    E1000State *s = opaque;
 911    unsigned int index = (addr & 0x1ffff) >> 2;
 912
 913    if (index < NWRITEOPS && macreg_writeops[index]) {
 914        macreg_writeops[index](s, index, val);
 915    } else if (index < NREADOPS && macreg_readops[index]) {
 916        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04x\n", index<<2, val);
 917    } else {
 918        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08x\n",
 919               index<<2, val);
 920    }
 921}
 922
 923static void
 924e1000_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
 925{
 926    // emulate hw without byte enables: no RMW
 927    e1000_mmio_writel(opaque, addr & ~3,
 928                      (val & 0xffff) << (8*(addr & 3)));
 929}
 930
 931static void
 932e1000_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
 933{
 934    // emulate hw without byte enables: no RMW
 935    e1000_mmio_writel(opaque, addr & ~3,
 936                      (val & 0xff) << (8*(addr & 3)));
 937}
 938
 939static uint32_t
 940e1000_mmio_readl(void *opaque, target_phys_addr_t addr)
 941{
 942    E1000State *s = opaque;
 943    unsigned int index = (addr & 0x1ffff) >> 2;
 944
 945    if (index < NREADOPS && macreg_readops[index])
 946    {
 947        return macreg_readops[index](s, index);
 948    }
 949    DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
 950    return 0;
 951}
 952
 953static uint32_t
 954e1000_mmio_readb(void *opaque, target_phys_addr_t addr)
 955{
 956    return ((e1000_mmio_readl(opaque, addr & ~3)) >>
 957            (8 * (addr & 3))) & 0xff;
 958}
 959
 960static uint32_t
 961e1000_mmio_readw(void *opaque, target_phys_addr_t addr)
 962{
 963    return ((e1000_mmio_readl(opaque, addr & ~3)) >>
 964            (8 * (addr & 3))) & 0xffff;
 965}
 966
 967static bool is_version_1(void *opaque, int version_id)
 968{
 969    return version_id == 1;
 970}
 971
 972static const VMStateDescription vmstate_e1000 = {
 973    .name = "e1000",
 974    .version_id = 2,
 975    .minimum_version_id = 1,
 976    .minimum_version_id_old = 1,
 977    .fields      = (VMStateField []) {
 978        VMSTATE_PCI_DEVICE(dev, E1000State),
 979        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
 980        VMSTATE_UNUSED(4), /* Was mmio_base.  */
 981        VMSTATE_UINT32(rxbuf_size, E1000State),
 982        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
 983        VMSTATE_UINT32(eecd_state.val_in, E1000State),
 984        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
 985        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
 986        VMSTATE_UINT16(eecd_state.reading, E1000State),
 987        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
 988        VMSTATE_UINT8(tx.ipcss, E1000State),
 989        VMSTATE_UINT8(tx.ipcso, E1000State),
 990        VMSTATE_UINT16(tx.ipcse, E1000State),
 991        VMSTATE_UINT8(tx.tucss, E1000State),
 992        VMSTATE_UINT8(tx.tucso, E1000State),
 993        VMSTATE_UINT16(tx.tucse, E1000State),
 994        VMSTATE_UINT32(tx.paylen, E1000State),
 995        VMSTATE_UINT8(tx.hdr_len, E1000State),
 996        VMSTATE_UINT16(tx.mss, E1000State),
 997        VMSTATE_UINT16(tx.size, E1000State),
 998        VMSTATE_UINT16(tx.tso_frames, E1000State),
 999        VMSTATE_UINT8(tx.sum_needed, E1000State),
1000        VMSTATE_INT8(tx.ip, E1000State),
1001        VMSTATE_INT8(tx.tcp, E1000State),
1002        VMSTATE_BUFFER(tx.header, E1000State),
1003        VMSTATE_BUFFER(tx.data, E1000State),
1004        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1005        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1006        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1007        VMSTATE_UINT32(mac_reg[EECD], E1000State),
1008        VMSTATE_UINT32(mac_reg[EERD], E1000State),
1009        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1010        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1011        VMSTATE_UINT32(mac_reg[ICR], E1000State),
1012        VMSTATE_UINT32(mac_reg[ICS], E1000State),
1013        VMSTATE_UINT32(mac_reg[IMC], E1000State),
1014        VMSTATE_UINT32(mac_reg[IMS], E1000State),
1015        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1016        VMSTATE_UINT32(mac_reg[MANC], E1000State),
1017        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1018        VMSTATE_UINT32(mac_reg[MPC], E1000State),
1019        VMSTATE_UINT32(mac_reg[PBA], E1000State),
1020        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1021        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1022        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1023        VMSTATE_UINT32(mac_reg[RDH], E1000State),
1024        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1025        VMSTATE_UINT32(mac_reg[RDT], E1000State),
1026        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1027        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1028        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1029        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1030        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1031        VMSTATE_UINT32(mac_reg[TDH], E1000State),
1032        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1033        VMSTATE_UINT32(mac_reg[TDT], E1000State),
1034        VMSTATE_UINT32(mac_reg[TORH], E1000State),
1035        VMSTATE_UINT32(mac_reg[TORL], E1000State),
1036        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1037        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1038        VMSTATE_UINT32(mac_reg[TPR], E1000State),
1039        VMSTATE_UINT32(mac_reg[TPT], E1000State),
1040        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1041        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1042        VMSTATE_UINT32(mac_reg[VET], E1000State),
1043        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1044        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1045        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1046        VMSTATE_END_OF_LIST()
1047    }
1048};
1049
1050static const uint16_t e1000_eeprom_template[64] = {
1051    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
1052    0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1053    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
1054    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
1055    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
1056    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1057    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
1058    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
1059};
1060
1061static const uint16_t phy_reg_init[] = {
1062    [PHY_CTRL] = 0x1140,                        [PHY_STATUS] = 0x796d, // link initially up
1063    [PHY_ID1] = 0x141,                          [PHY_ID2] = PHY_ID2_INIT,
1064    [PHY_1000T_CTRL] = 0x0e00,                  [M88E1000_PHY_SPEC_CTRL] = 0x360,
1065    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,      [PHY_AUTONEG_ADV] = 0xde1,
1066    [PHY_LP_ABILITY] = 0x1e0,                   [PHY_1000T_STATUS] = 0x3c00,
1067    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
1068};
1069
1070static const uint32_t mac_reg_init[] = {
1071    [PBA] =     0x00100030,
1072    [LEDCTL] =  0x602,
1073    [CTRL] =    E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
1074                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
1075    [STATUS] =  0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
1076                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
1077                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
1078                E1000_STATUS_LU,
1079    [MANC] =    E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
1080                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
1081                E1000_MANC_RMCP_EN,
1082};
1083
1084/* PCI interface */
1085
1086static CPUWriteMemoryFunc * const e1000_mmio_write[] = {
1087    e1000_mmio_writeb,  e1000_mmio_writew,      e1000_mmio_writel
1088};
1089
1090static CPUReadMemoryFunc * const e1000_mmio_read[] = {
1091    e1000_mmio_readb,   e1000_mmio_readw,       e1000_mmio_readl
1092};
1093
1094static void
1095e1000_mmio_map(PCIDevice *pci_dev, int region_num,
1096                pcibus_t addr, pcibus_t size, int type)
1097{
1098    E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
1099    int i;
1100    const uint32_t excluded_regs[] = {
1101        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1102        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1103    };
1104
1105
1106    DBGOUT(MMIO, "e1000_mmio_map addr=0x%08"FMT_PCIBUS" 0x%08"FMT_PCIBUS"\n",
1107           addr, size);
1108
1109    cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index);
1110    qemu_register_coalesced_mmio(addr, excluded_regs[0]);
1111
1112    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1113        qemu_register_coalesced_mmio(addr + excluded_regs[i] + 4,
1114                                     excluded_regs[i + 1] -
1115                                     excluded_regs[i] - 4);
1116}
1117
1118static void
1119e1000_cleanup(VLANClientState *nc)
1120{
1121    E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
1122
1123    s->nic = NULL;
1124}
1125
1126static int
1127pci_e1000_uninit(PCIDevice *dev)
1128{
1129    E1000State *d = DO_UPCAST(E1000State, dev, dev);
1130
1131    cpu_unregister_io_memory(d->mmio_index);
1132    qemu_del_vlan_client(&d->nic->nc);
1133    return 0;
1134}
1135
1136static void e1000_reset(void *opaque)
1137{
1138    E1000State *d = opaque;
1139
1140    memset(d->phy_reg, 0, sizeof d->phy_reg);
1141    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
1142    memset(d->mac_reg, 0, sizeof d->mac_reg);
1143    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
1144    d->rxbuf_min_shift = 1;
1145    memset(&d->tx, 0, sizeof d->tx);
1146}
1147
1148static NetClientInfo net_e1000_info = {
1149    .type = NET_CLIENT_TYPE_NIC,
1150    .size = sizeof(NICState),
1151    .can_receive = e1000_can_receive,
1152    .receive = e1000_receive,
1153    .cleanup = e1000_cleanup,
1154    .link_status_changed = e1000_set_link_status,
1155};
1156
1157static int pci_e1000_init(PCIDevice *pci_dev)
1158{
1159    E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
1160    uint8_t *pci_conf;
1161    uint16_t checksum = 0;
1162    int i;
1163    uint8_t *macaddr;
1164
1165    pci_conf = d->dev.config;
1166
1167    /* TODO: we have no capabilities, so why is this bit set? */
1168    pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_CAP_LIST);
1169    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1170    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1171
1172    /* TODO: RST# value should be 0 if programmable, PCI spec 6.2.4 */
1173    pci_conf[PCI_INTERRUPT_PIN] = 1; // interrupt pin 0
1174
1175    d->mmio_index = cpu_register_io_memory(e1000_mmio_read,
1176            e1000_mmio_write, d, DEVICE_LITTLE_ENDIAN);
1177
1178    pci_register_bar(&d->dev, 0, PNPMMIO_SIZE,
1179                           PCI_BASE_ADDRESS_SPACE_MEMORY, e1000_mmio_map);
1180
1181    pci_register_bar(&d->dev, 1, IOPORT_SIZE,
1182                           PCI_BASE_ADDRESS_SPACE_IO, ioport_map);
1183
1184    memmove(d->eeprom_data, e1000_eeprom_template,
1185        sizeof e1000_eeprom_template);
1186    qemu_macaddr_default_if_unset(&d->conf.macaddr);
1187    macaddr = d->conf.macaddr.a;
1188    for (i = 0; i < 3; i++)
1189        d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1190    for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1191        checksum += d->eeprom_data[i];
1192    checksum = (uint16_t) EEPROM_SUM - checksum;
1193    d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1194
1195    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1196                          d->dev.qdev.info->name, d->dev.qdev.id, d);
1197
1198    qemu_format_nic_info_str(&d->nic->nc, macaddr);
1199
1200    add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/ethernet-phy@0");
1201
1202    return 0;
1203}
1204
1205static void qdev_e1000_reset(DeviceState *dev)
1206{
1207    E1000State *d = DO_UPCAST(E1000State, dev.qdev, dev);
1208    e1000_reset(d);
1209}
1210
1211static PCIDeviceInfo e1000_info = {
1212    .qdev.name  = "e1000",
1213    .qdev.desc  = "Intel Gigabit Ethernet",
1214    .qdev.size  = sizeof(E1000State),
1215    .qdev.reset = qdev_e1000_reset,
1216    .qdev.vmsd  = &vmstate_e1000,
1217    .init       = pci_e1000_init,
1218    .exit       = pci_e1000_uninit,
1219    .romfile    = "pxe-e1000.rom",
1220    .vendor_id  = PCI_VENDOR_ID_INTEL,
1221    .device_id  = E1000_DEVID,
1222    .revision   = 0x03,
1223    .class_id   = PCI_CLASS_NETWORK_ETHERNET,
1224    .qdev.props = (Property[]) {
1225        DEFINE_NIC_PROPERTIES(E1000State, conf),
1226        DEFINE_PROP_END_OF_LIST(),
1227    }
1228};
1229
1230static void e1000_register_devices(void)
1231{
1232    pci_qdev_register(&e1000_info);
1233}
1234
1235device_init(e1000_register_devices)
1236