qemu/hw/net/rocker/rocker.c
<<
>>
Prefs
   1/*
   2 * QEMU rocker switch emulation - PCI device
   3 *
   4 * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
   5 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "hw/pci/pci_device.h"
  20#include "hw/qdev-properties.h"
  21#include "hw/qdev-properties-system.h"
  22#include "migration/vmstate.h"
  23#include "hw/pci/msix.h"
  24#include "net/net.h"
  25#include "net/eth.h"
  26#include "qapi/error.h"
  27#include "qapi/qapi-commands-rocker.h"
  28#include "qemu/iov.h"
  29#include "qemu/module.h"
  30#include "qemu/bitops.h"
  31#include "qemu/log.h"
  32
  33#include "rocker.h"
  34#include "rocker_hw.h"
  35#include "rocker_fp.h"
  36#include "rocker_desc.h"
  37#include "rocker_tlv.h"
  38#include "rocker_world.h"
  39#include "rocker_of_dpa.h"
  40
  41struct rocker {
  42    /* private */
  43    PCIDevice parent_obj;
  44    /* public */
  45
  46    MemoryRegion mmio;
  47    MemoryRegion msix_bar;
  48
  49    /* switch configuration */
  50    char *name;                  /* switch name */
  51    char *world_name;            /* world name */
  52    uint32_t fp_ports;           /* front-panel port count */
  53    NICPeers *fp_ports_peers;
  54    MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
  55    uint64_t switch_id;          /* switch id */
  56
  57    /* front-panel ports */
  58    FpPort *fp_port[ROCKER_FP_PORTS_MAX];
  59
  60    /* register backings */
  61    uint32_t test_reg;
  62    uint64_t test_reg64;
  63    dma_addr_t test_dma_addr;
  64    uint32_t test_dma_size;
  65    uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
  66
  67    /* desc rings */
  68    DescRing **rings;
  69
  70    /* switch worlds */
  71    World *worlds[ROCKER_WORLD_TYPE_MAX];
  72    World *world_dflt;
  73
  74    QLIST_ENTRY(rocker) next;
  75};
  76
  77static QLIST_HEAD(, rocker) rockers;
  78
  79Rocker *rocker_find(const char *name)
  80{
  81    Rocker *r;
  82
  83    QLIST_FOREACH(r, &rockers, next)
  84        if (strcmp(r->name, name) == 0) {
  85            return r;
  86        }
  87
  88    return NULL;
  89}
  90
  91World *rocker_get_world(Rocker *r, enum rocker_world_type type)
  92{
  93    if (type < ROCKER_WORLD_TYPE_MAX) {
  94        return r->worlds[type];
  95    }
  96    return NULL;
  97}
  98
  99RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 100{
 101    RockerSwitch *rocker;
 102    Rocker *r;
 103
 104    r = rocker_find(name);
 105    if (!r) {
 106        error_setg(errp, "rocker %s not found", name);
 107        return NULL;
 108    }
 109
 110    rocker = g_new0(RockerSwitch, 1);
 111    rocker->name = g_strdup(r->name);
 112    rocker->id = r->switch_id;
 113    rocker->ports = r->fp_ports;
 114
 115    return rocker;
 116}
 117
 118RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 119{
 120    RockerPortList *list = NULL;
 121    Rocker *r;
 122    int i;
 123
 124    r = rocker_find(name);
 125    if (!r) {
 126        error_setg(errp, "rocker %s not found", name);
 127        return NULL;
 128    }
 129
 130    for (i = r->fp_ports - 1; i >= 0; i--) {
 131        QAPI_LIST_PREPEND(list, fp_port_get_info(r->fp_port[i]));
 132    }
 133
 134    return list;
 135}
 136
 137uint32_t rocker_fp_ports(Rocker *r)
 138{
 139    return r->fp_ports;
 140}
 141
 142static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
 143                                            DescRing *ring)
 144{
 145    return (desc_ring_index(ring) - 2) / 2 + 1;
 146}
 147
 148static int tx_consume(Rocker *r, DescInfo *info)
 149{
 150    PCIDevice *dev = PCI_DEVICE(r);
 151    char *buf = desc_get_buf(info, true);
 152    RockerTlv *tlv_frag;
 153    RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
 154    struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
 155    uint32_t pport;
 156    uint32_t port;
 157    uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
 158    uint16_t tx_l3_csum_off = 0;
 159    uint16_t tx_tso_mss = 0;
 160    uint16_t tx_tso_hdr_len = 0;
 161    int iovcnt = 0;
 162    int err = ROCKER_OK;
 163    int rem;
 164    int i;
 165
 166    if (!buf) {
 167        return -ROCKER_ENXIO;
 168    }
 169
 170    rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
 171
 172    if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
 173        return -ROCKER_EINVAL;
 174    }
 175
 176    pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
 177    if (!fp_port_from_pport(pport, &port)) {
 178        return -ROCKER_EINVAL;
 179    }
 180
 181    if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
 182        tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
 183    }
 184
 185    switch (tx_offload) {
 186    case ROCKER_TX_OFFLOAD_L3_CSUM:
 187        if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 188            return -ROCKER_EINVAL;
 189        }
 190        break;
 191    case ROCKER_TX_OFFLOAD_TSO:
 192        if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
 193            !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 194            return -ROCKER_EINVAL;
 195        }
 196        break;
 197    }
 198
 199    if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 200        tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
 201        qemu_log_mask(LOG_UNIMP, "rocker %s: L3 not implemented"
 202                                 " (cksum off: %u)\n",
 203                      __func__, tx_l3_csum_off);
 204    }
 205
 206    if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
 207        tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
 208        qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented (MSS: %u)\n",
 209                      __func__, tx_tso_mss);
 210    }
 211
 212    if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 213        tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
 214        qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented"
 215                                 " (hdr length: %u)\n",
 216                      __func__, tx_tso_hdr_len);
 217    }
 218
 219    rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
 220        hwaddr frag_addr;
 221        uint16_t frag_len;
 222
 223        if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
 224            err = -ROCKER_EINVAL;
 225            goto err_bad_attr;
 226        }
 227
 228        rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
 229
 230        if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
 231            !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
 232            err = -ROCKER_EINVAL;
 233            goto err_bad_attr;
 234        }
 235
 236        frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 237        frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
 238
 239        if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
 240            goto err_too_many_frags;
 241        }
 242        iov[iovcnt].iov_len = frag_len;
 243        iov[iovcnt].iov_base = g_malloc(frag_len);
 244
 245        pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
 246                     iov[iovcnt].iov_len);
 247
 248        iovcnt++;
 249    }
 250
 251    err = fp_port_eg(r->fp_port[port], iov, iovcnt);
 252
 253err_too_many_frags:
 254err_bad_attr:
 255    for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
 256        g_free(iov[i].iov_base);
 257    }
 258
 259    return err;
 260}
 261
 262static int cmd_get_port_settings(Rocker *r,
 263                                 DescInfo *info, char *buf,
 264                                 RockerTlv *cmd_info_tlv)
 265{
 266    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 267    RockerTlv *nest;
 268    FpPort *fp_port;
 269    uint32_t pport;
 270    uint32_t port;
 271    uint32_t speed;
 272    uint8_t duplex;
 273    uint8_t autoneg;
 274    uint8_t learning;
 275    char *phys_name;
 276    MACAddr macaddr;
 277    enum rocker_world_type mode;
 278    size_t tlv_size;
 279    int pos;
 280    int err;
 281
 282    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 283                            cmd_info_tlv);
 284
 285    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 286        return -ROCKER_EINVAL;
 287    }
 288
 289    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 290    if (!fp_port_from_pport(pport, &port)) {
 291        return -ROCKER_EINVAL;
 292    }
 293    fp_port = r->fp_port[port];
 294
 295    err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
 296    if (err) {
 297        return err;
 298    }
 299
 300    fp_port_get_macaddr(fp_port, &macaddr);
 301    mode = world_type(fp_port_get_world(fp_port));
 302    learning = fp_port_get_learning(fp_port);
 303    phys_name = fp_port_get_name(fp_port);
 304
 305    tlv_size = rocker_tlv_total_size(0) +                 /* nest */
 306               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 307               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
 308               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
 309               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
 310               rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
 311               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
 312               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
 313               rocker_tlv_total_size(strlen(phys_name));
 314
 315    if (tlv_size > desc_buf_size(info)) {
 316        return -ROCKER_EMSGSIZE;
 317    }
 318
 319    pos = 0;
 320    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
 321    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
 322    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
 323    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
 324    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
 325    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
 326                   sizeof(macaddr.a), macaddr.a);
 327    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
 328    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
 329                      learning);
 330    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
 331                   strlen(phys_name), phys_name);
 332    rocker_tlv_nest_end(buf, &pos, nest);
 333
 334    return desc_set_buf(info, tlv_size);
 335}
 336
 337static int cmd_set_port_settings(Rocker *r,
 338                                 RockerTlv *cmd_info_tlv)
 339{
 340    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 341    FpPort *fp_port;
 342    uint32_t pport;
 343    uint32_t port;
 344    uint32_t speed;
 345    uint8_t duplex;
 346    uint8_t autoneg;
 347    uint8_t learning;
 348    MACAddr macaddr;
 349    enum rocker_world_type mode;
 350    int err;
 351
 352    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 353                            cmd_info_tlv);
 354
 355    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 356        return -ROCKER_EINVAL;
 357    }
 358
 359    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 360    if (!fp_port_from_pport(pport, &port)) {
 361        return -ROCKER_EINVAL;
 362    }
 363    fp_port = r->fp_port[port];
 364
 365    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
 366        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
 367        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
 368
 369        speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
 370        duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
 371        autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 372
 373        err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
 374        if (err) {
 375            return err;
 376        }
 377    }
 378
 379    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
 380        if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
 381            sizeof(macaddr.a)) {
 382            return -ROCKER_EINVAL;
 383        }
 384        memcpy(macaddr.a,
 385               rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
 386               sizeof(macaddr.a));
 387        fp_port_set_macaddr(fp_port, &macaddr);
 388    }
 389
 390    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 391        mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
 392        if (mode >= ROCKER_WORLD_TYPE_MAX) {
 393            return -ROCKER_EINVAL;
 394        }
 395        /* We don't support world change. */
 396        if (!fp_port_check_world(fp_port, r->worlds[mode])) {
 397            return -ROCKER_EINVAL;
 398        }
 399    }
 400
 401    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
 402        learning =
 403            rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
 404        fp_port_set_learning(fp_port, learning);
 405    }
 406
 407    return ROCKER_OK;
 408}
 409
 410static int cmd_consume(Rocker *r, DescInfo *info)
 411{
 412    char *buf = desc_get_buf(info, false);
 413    RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
 414    RockerTlv *info_tlv;
 415    World *world;
 416    uint16_t cmd;
 417    int err;
 418
 419    if (!buf) {
 420        return -ROCKER_ENXIO;
 421    }
 422
 423    rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
 424
 425    if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
 426        return -ROCKER_EINVAL;
 427    }
 428
 429    cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
 430    info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
 431
 432    /* This might be reworked to something like this:
 433     * Every world will have an array of command handlers from
 434     * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
 435     * up to each world to implement whatever command it want.
 436     * It can reference "generic" commands as cmd_set_port_settings or
 437     * cmd_get_port_settings
 438     */
 439
 440    switch (cmd) {
 441    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
 442    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
 443    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
 444    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
 445    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
 446    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
 447    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
 448    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
 449        world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 450        err = world_do_cmd(world, info, buf, cmd, info_tlv);
 451        break;
 452    case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
 453        err = cmd_get_port_settings(r, info, buf, info_tlv);
 454        break;
 455    case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
 456        err = cmd_set_port_settings(r, info_tlv);
 457        break;
 458    default:
 459        err = -ROCKER_EINVAL;
 460        break;
 461    }
 462
 463    return err;
 464}
 465
 466static void rocker_msix_irq(Rocker *r, unsigned vector)
 467{
 468    PCIDevice *dev = PCI_DEVICE(r);
 469
 470    DPRINTF("MSI-X notify request for vector %d\n", vector);
 471    if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
 472        DPRINTF("incorrect vector %d\n", vector);
 473        return;
 474    }
 475    msix_notify(dev, vector);
 476}
 477
 478int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
 479{
 480    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 481    DescInfo *info = desc_ring_fetch_desc(ring);
 482    RockerTlv *nest;
 483    char *buf;
 484    size_t tlv_size;
 485    int pos;
 486    int err;
 487
 488    if (!info) {
 489        return -ROCKER_ENOBUFS;
 490    }
 491
 492    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 493               rocker_tlv_total_size(0) +                 /* nest */
 494               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 495               rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
 496
 497    if (tlv_size > desc_buf_size(info)) {
 498        err = -ROCKER_EMSGSIZE;
 499        goto err_too_big;
 500    }
 501
 502    buf = desc_get_buf(info, false);
 503    if (!buf) {
 504        err = -ROCKER_ENOMEM;
 505        goto err_no_mem;
 506    }
 507
 508    pos = 0;
 509    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 510                        ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
 511    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 512    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
 513    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
 514                      link_up ? 1 : 0);
 515    rocker_tlv_nest_end(buf, &pos, nest);
 516
 517    err = desc_set_buf(info, tlv_size);
 518
 519err_too_big:
 520err_no_mem:
 521    if (desc_ring_post_desc(ring, err)) {
 522        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 523    }
 524
 525    return err;
 526}
 527
 528int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
 529                               uint16_t vlan_id)
 530{
 531    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 532    DescInfo *info;
 533    FpPort *fp_port;
 534    uint32_t port;
 535    RockerTlv *nest;
 536    char *buf;
 537    size_t tlv_size;
 538    int pos;
 539    int err;
 540
 541    if (!fp_port_from_pport(pport, &port)) {
 542        return -ROCKER_EINVAL;
 543    }
 544    fp_port = r->fp_port[port];
 545    if (!fp_port_get_learning(fp_port)) {
 546        return ROCKER_OK;
 547    }
 548
 549    info = desc_ring_fetch_desc(ring);
 550    if (!info) {
 551        return -ROCKER_ENOBUFS;
 552    }
 553
 554    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 555               rocker_tlv_total_size(0) +                 /* nest */
 556               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 557               rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
 558               rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
 559
 560    if (tlv_size > desc_buf_size(info)) {
 561        err = -ROCKER_EMSGSIZE;
 562        goto err_too_big;
 563    }
 564
 565    buf = desc_get_buf(info, false);
 566    if (!buf) {
 567        err = -ROCKER_ENOMEM;
 568        goto err_no_mem;
 569    }
 570
 571    pos = 0;
 572    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 573                        ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
 574    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 575    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
 576    rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
 577    rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
 578    rocker_tlv_nest_end(buf, &pos, nest);
 579
 580    err = desc_set_buf(info, tlv_size);
 581
 582err_too_big:
 583err_no_mem:
 584    if (desc_ring_post_desc(ring, err)) {
 585        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 586    }
 587
 588    return err;
 589}
 590
 591static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
 592                                                     uint32_t pport)
 593{
 594    return r->rings[(pport - 1) * 2 + 3];
 595}
 596
 597int rx_produce(World *world, uint32_t pport,
 598               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 599{
 600    Rocker *r = world_rocker(world);
 601    PCIDevice *dev = (PCIDevice *)r;
 602    DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
 603    DescInfo *info = desc_ring_fetch_desc(ring);
 604    char *data;
 605    size_t data_size = iov_size(iov, iovcnt);
 606    char *buf;
 607    uint16_t rx_flags = 0;
 608    uint16_t rx_csum = 0;
 609    size_t tlv_size;
 610    RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
 611    hwaddr frag_addr;
 612    uint16_t frag_max_len;
 613    int pos;
 614    int err;
 615
 616    if (!info) {
 617        return -ROCKER_ENOBUFS;
 618    }
 619
 620    buf = desc_get_buf(info, false);
 621    if (!buf) {
 622        err = -ROCKER_ENXIO;
 623        goto out;
 624    }
 625    rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
 626
 627    if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
 628        !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
 629        err = -ROCKER_EINVAL;
 630        goto out;
 631    }
 632
 633    frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
 634    frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
 635
 636    if (data_size > frag_max_len) {
 637        err = -ROCKER_EMSGSIZE;
 638        goto out;
 639    }
 640
 641    if (copy_to_cpu) {
 642        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
 643    }
 644
 645    /* XXX calc rx flags/csum */
 646
 647    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
 648               rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
 649               rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
 650               rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
 651               rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
 652
 653    if (tlv_size > desc_buf_size(info)) {
 654        err = -ROCKER_EMSGSIZE;
 655        goto out;
 656    }
 657
 658    /* TODO:
 659     * iov dma write can be optimized in similar way e1000 does it in
 660     * e1000_receive_iov. But maybe if would make sense to introduce
 661     * generic helper iov_dma_write.
 662     */
 663
 664    data = g_malloc(data_size);
 665
 666    iov_to_buf(iov, iovcnt, 0, data, data_size);
 667    pci_dma_write(dev, frag_addr, data, data_size);
 668    g_free(data);
 669
 670    pos = 0;
 671    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
 672    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
 673    rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
 674    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
 675    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
 676
 677    err = desc_set_buf(info, tlv_size);
 678
 679out:
 680    if (desc_ring_post_desc(ring, err)) {
 681        rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
 682    }
 683
 684    return err;
 685}
 686
 687int rocker_port_eg(Rocker *r, uint32_t pport,
 688                   const struct iovec *iov, int iovcnt)
 689{
 690    FpPort *fp_port;
 691    uint32_t port;
 692
 693    if (!fp_port_from_pport(pport, &port)) {
 694        return -ROCKER_EINVAL;
 695    }
 696
 697    fp_port = r->fp_port[port];
 698
 699    return fp_port_eg(fp_port, iov, iovcnt);
 700}
 701
 702static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
 703{
 704    PCIDevice *dev = PCI_DEVICE(r);
 705    char *buf;
 706    int i;
 707
 708    buf = g_malloc(r->test_dma_size);
 709
 710    switch (val) {
 711    case ROCKER_TEST_DMA_CTRL_CLEAR:
 712        memset(buf, 0, r->test_dma_size);
 713        break;
 714    case ROCKER_TEST_DMA_CTRL_FILL:
 715        memset(buf, 0x96, r->test_dma_size);
 716        break;
 717    case ROCKER_TEST_DMA_CTRL_INVERT:
 718        pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
 719        for (i = 0; i < r->test_dma_size; i++) {
 720            buf[i] = ~buf[i];
 721        }
 722        break;
 723    default:
 724        DPRINTF("not test dma control val=0x%08x\n", val);
 725        goto err_out;
 726    }
 727    pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
 728
 729    rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
 730
 731err_out:
 732    g_free(buf);
 733}
 734
 735static void rocker_reset(DeviceState *dev);
 736
 737static void rocker_control(Rocker *r, uint32_t val)
 738{
 739    if (val & ROCKER_CONTROL_RESET) {
 740        rocker_reset(DEVICE(r));
 741    }
 742}
 743
 744static int rocker_pci_ring_count(Rocker *r)
 745{
 746    /* There are:
 747     * - command ring
 748     * - event ring
 749     * - tx and rx ring per each port
 750     */
 751    return 2 + (2 * r->fp_ports);
 752}
 753
 754static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
 755{
 756    hwaddr start = ROCKER_DMA_DESC_BASE;
 757    hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
 758
 759    return addr >= start && addr < end;
 760}
 761
 762static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
 763{
 764    int i;
 765    bool old_enabled;
 766    bool new_enabled;
 767    FpPort *fp_port;
 768
 769    for (i = 0; i < r->fp_ports; i++) {
 770        fp_port = r->fp_port[i];
 771        old_enabled = fp_port_enabled(fp_port);
 772        new_enabled = (new >> (i + 1)) & 0x1;
 773        if (new_enabled == old_enabled) {
 774            continue;
 775        }
 776        if (new_enabled) {
 777            fp_port_enable(r->fp_port[i]);
 778        } else {
 779            fp_port_disable(r->fp_port[i]);
 780        }
 781    }
 782}
 783
 784static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
 785{
 786    Rocker *r = opaque;
 787
 788    if (rocker_addr_is_desc_reg(r, addr)) {
 789        unsigned index = ROCKER_RING_INDEX(addr);
 790        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 791
 792        switch (offset) {
 793        case ROCKER_DMA_DESC_ADDR_OFFSET:
 794            r->lower32 = (uint64_t)val;
 795            break;
 796        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
 797            desc_ring_set_base_addr(r->rings[index],
 798                                    ((uint64_t)val) << 32 | r->lower32);
 799            r->lower32 = 0;
 800            break;
 801        case ROCKER_DMA_DESC_SIZE_OFFSET:
 802            desc_ring_set_size(r->rings[index], val);
 803            break;
 804        case ROCKER_DMA_DESC_HEAD_OFFSET:
 805            if (desc_ring_set_head(r->rings[index], val)) {
 806                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 807            }
 808            break;
 809        case ROCKER_DMA_DESC_CTRL_OFFSET:
 810            desc_ring_set_ctrl(r->rings[index], val);
 811            break;
 812        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 813            if (desc_ring_ret_credits(r->rings[index], val)) {
 814                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 815            }
 816            break;
 817        default:
 818            DPRINTF("not implemented dma reg write(l) addr=0x" HWADDR_FMT_plx
 819                    " val=0x%08x (ring %d, addr=0x%02x)\n",
 820                    addr, val, index, offset);
 821            break;
 822        }
 823        return;
 824    }
 825
 826    switch (addr) {
 827    case ROCKER_TEST_REG:
 828        r->test_reg = val;
 829        break;
 830    case ROCKER_TEST_REG64:
 831    case ROCKER_TEST_DMA_ADDR:
 832    case ROCKER_PORT_PHYS_ENABLE:
 833        r->lower32 = (uint64_t)val;
 834        break;
 835    case ROCKER_TEST_REG64 + 4:
 836        r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
 837        r->lower32 = 0;
 838        break;
 839    case ROCKER_TEST_IRQ:
 840        rocker_msix_irq(r, val);
 841        break;
 842    case ROCKER_TEST_DMA_SIZE:
 843        r->test_dma_size = val & 0xFFFF;
 844        break;
 845    case ROCKER_TEST_DMA_ADDR + 4:
 846        r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
 847        r->lower32 = 0;
 848        break;
 849    case ROCKER_TEST_DMA_CTRL:
 850        rocker_test_dma_ctrl(r, val);
 851        break;
 852    case ROCKER_CONTROL:
 853        rocker_control(r, val);
 854        break;
 855    case ROCKER_PORT_PHYS_ENABLE + 4:
 856        rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
 857        r->lower32 = 0;
 858        break;
 859    default:
 860        DPRINTF("not implemented write(l) addr=0x" HWADDR_FMT_plx
 861                " val=0x%08x\n", addr, val);
 862        break;
 863    }
 864}
 865
 866static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
 867{
 868    Rocker *r = opaque;
 869
 870    if (rocker_addr_is_desc_reg(r, addr)) {
 871        unsigned index = ROCKER_RING_INDEX(addr);
 872        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 873
 874        switch (offset) {
 875        case ROCKER_DMA_DESC_ADDR_OFFSET:
 876            desc_ring_set_base_addr(r->rings[index], val);
 877            break;
 878        default:
 879            DPRINTF("not implemented dma reg write(q) addr=0x" HWADDR_FMT_plx
 880                    " val=0x" HWADDR_FMT_plx " (ring %d, offset=0x%02x)\n",
 881                    addr, val, index, offset);
 882            break;
 883        }
 884        return;
 885    }
 886
 887    switch (addr) {
 888    case ROCKER_TEST_REG64:
 889        r->test_reg64 = val;
 890        break;
 891    case ROCKER_TEST_DMA_ADDR:
 892        r->test_dma_addr = val;
 893        break;
 894    case ROCKER_PORT_PHYS_ENABLE:
 895        rocker_port_phys_enable_write(r, val);
 896        break;
 897    default:
 898        DPRINTF("not implemented write(q) addr=0x" HWADDR_FMT_plx
 899                " val=0x" HWADDR_FMT_plx "\n", addr, val);
 900        break;
 901    }
 902}
 903
 904#ifdef DEBUG_ROCKER
 905#define regname(reg) case (reg): return #reg
 906static const char *rocker_reg_name(void *opaque, hwaddr addr)
 907{
 908    Rocker *r = opaque;
 909
 910    if (rocker_addr_is_desc_reg(r, addr)) {
 911        unsigned index = ROCKER_RING_INDEX(addr);
 912        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 913        static char buf[100];
 914        char ring_name[10];
 915
 916        switch (index) {
 917        case 0:
 918            sprintf(ring_name, "cmd");
 919            break;
 920        case 1:
 921            sprintf(ring_name, "event");
 922            break;
 923        default:
 924            sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
 925                    (index - 2) / 2);
 926        }
 927
 928        switch (offset) {
 929        case ROCKER_DMA_DESC_ADDR_OFFSET:
 930            sprintf(buf, "Ring[%s] ADDR", ring_name);
 931            return buf;
 932        case ROCKER_DMA_DESC_ADDR_OFFSET+4:
 933            sprintf(buf, "Ring[%s] ADDR+4", ring_name);
 934            return buf;
 935        case ROCKER_DMA_DESC_SIZE_OFFSET:
 936            sprintf(buf, "Ring[%s] SIZE", ring_name);
 937            return buf;
 938        case ROCKER_DMA_DESC_HEAD_OFFSET:
 939            sprintf(buf, "Ring[%s] HEAD", ring_name);
 940            return buf;
 941        case ROCKER_DMA_DESC_TAIL_OFFSET:
 942            sprintf(buf, "Ring[%s] TAIL", ring_name);
 943            return buf;
 944        case ROCKER_DMA_DESC_CTRL_OFFSET:
 945            sprintf(buf, "Ring[%s] CTRL", ring_name);
 946            return buf;
 947        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 948            sprintf(buf, "Ring[%s] CREDITS", ring_name);
 949            return buf;
 950        default:
 951            sprintf(buf, "Ring[%s] ???", ring_name);
 952            return buf;
 953        }
 954    } else {
 955        switch (addr) {
 956            regname(ROCKER_BOGUS_REG0);
 957            regname(ROCKER_BOGUS_REG1);
 958            regname(ROCKER_BOGUS_REG2);
 959            regname(ROCKER_BOGUS_REG3);
 960            regname(ROCKER_TEST_REG);
 961            regname(ROCKER_TEST_REG64);
 962            regname(ROCKER_TEST_REG64+4);
 963            regname(ROCKER_TEST_IRQ);
 964            regname(ROCKER_TEST_DMA_ADDR);
 965            regname(ROCKER_TEST_DMA_ADDR+4);
 966            regname(ROCKER_TEST_DMA_SIZE);
 967            regname(ROCKER_TEST_DMA_CTRL);
 968            regname(ROCKER_CONTROL);
 969            regname(ROCKER_PORT_PHYS_COUNT);
 970            regname(ROCKER_PORT_PHYS_LINK_STATUS);
 971            regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
 972            regname(ROCKER_PORT_PHYS_ENABLE);
 973            regname(ROCKER_PORT_PHYS_ENABLE+4);
 974            regname(ROCKER_SWITCH_ID);
 975            regname(ROCKER_SWITCH_ID+4);
 976        }
 977    }
 978    return "???";
 979}
 980#else
 981static const char *rocker_reg_name(void *opaque, hwaddr addr)
 982{
 983    return NULL;
 984}
 985#endif
 986
 987static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
 988                              unsigned size)
 989{
 990    DPRINTF("Write %s addr " HWADDR_FMT_plx
 991            ", size %u, val " HWADDR_FMT_plx "\n",
 992            rocker_reg_name(opaque, addr), addr, size, val);
 993
 994    switch (size) {
 995    case 4:
 996        rocker_io_writel(opaque, addr, val);
 997        break;
 998    case 8:
 999        rocker_io_writeq(opaque, addr, val);
1000        break;
1001    }
1002}
1003
1004static uint64_t rocker_port_phys_link_status(Rocker *r)
1005{
1006    int i;
1007    uint64_t status = 0;
1008
1009    for (i = 0; i < r->fp_ports; i++) {
1010        FpPort *port = r->fp_port[i];
1011
1012        if (fp_port_get_link_up(port)) {
1013            status |= 1ULL << (i + 1);
1014        }
1015    }
1016    return status;
1017}
1018
1019static uint64_t rocker_port_phys_enable_read(Rocker *r)
1020{
1021    int i;
1022    uint64_t ret = 0;
1023
1024    for (i = 0; i < r->fp_ports; i++) {
1025        FpPort *port = r->fp_port[i];
1026
1027        if (fp_port_enabled(port)) {
1028            ret |= 1ULL << (i + 1);
1029        }
1030    }
1031    return ret;
1032}
1033
1034static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1035{
1036    Rocker *r = opaque;
1037    uint32_t ret;
1038
1039    if (rocker_addr_is_desc_reg(r, addr)) {
1040        unsigned index = ROCKER_RING_INDEX(addr);
1041        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1042
1043        switch (offset) {
1044        case ROCKER_DMA_DESC_ADDR_OFFSET:
1045            ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1046            break;
1047        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1048            ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1049            break;
1050        case ROCKER_DMA_DESC_SIZE_OFFSET:
1051            ret = desc_ring_get_size(r->rings[index]);
1052            break;
1053        case ROCKER_DMA_DESC_HEAD_OFFSET:
1054            ret = desc_ring_get_head(r->rings[index]);
1055            break;
1056        case ROCKER_DMA_DESC_TAIL_OFFSET:
1057            ret = desc_ring_get_tail(r->rings[index]);
1058            break;
1059        case ROCKER_DMA_DESC_CREDITS_OFFSET:
1060            ret = desc_ring_get_credits(r->rings[index]);
1061            break;
1062        default:
1063            DPRINTF("not implemented dma reg read(l) addr=0x" HWADDR_FMT_plx
1064                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1065            ret = 0;
1066            break;
1067        }
1068        return ret;
1069    }
1070
1071    switch (addr) {
1072    case ROCKER_BOGUS_REG0:
1073    case ROCKER_BOGUS_REG1:
1074    case ROCKER_BOGUS_REG2:
1075    case ROCKER_BOGUS_REG3:
1076        ret = 0xDEADBABE;
1077        break;
1078    case ROCKER_TEST_REG:
1079        ret = r->test_reg * 2;
1080        break;
1081    case ROCKER_TEST_REG64:
1082        ret = (uint32_t)(r->test_reg64 * 2);
1083        break;
1084    case ROCKER_TEST_REG64 + 4:
1085        ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1086        break;
1087    case ROCKER_TEST_DMA_SIZE:
1088        ret = r->test_dma_size;
1089        break;
1090    case ROCKER_TEST_DMA_ADDR:
1091        ret = (uint32_t)r->test_dma_addr;
1092        break;
1093    case ROCKER_TEST_DMA_ADDR + 4:
1094        ret = (uint32_t)(r->test_dma_addr >> 32);
1095        break;
1096    case ROCKER_PORT_PHYS_COUNT:
1097        ret = r->fp_ports;
1098        break;
1099    case ROCKER_PORT_PHYS_LINK_STATUS:
1100        ret = (uint32_t)rocker_port_phys_link_status(r);
1101        break;
1102    case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1103        ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1104        break;
1105    case ROCKER_PORT_PHYS_ENABLE:
1106        ret = (uint32_t)rocker_port_phys_enable_read(r);
1107        break;
1108    case ROCKER_PORT_PHYS_ENABLE + 4:
1109        ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1110        break;
1111    case ROCKER_SWITCH_ID:
1112        ret = (uint32_t)r->switch_id;
1113        break;
1114    case ROCKER_SWITCH_ID + 4:
1115        ret = (uint32_t)(r->switch_id >> 32);
1116        break;
1117    default:
1118        DPRINTF("not implemented read(l) addr=0x" HWADDR_FMT_plx "\n", addr);
1119        ret = 0;
1120        break;
1121    }
1122    return ret;
1123}
1124
1125static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1126{
1127    Rocker *r = opaque;
1128    uint64_t ret;
1129
1130    if (rocker_addr_is_desc_reg(r, addr)) {
1131        unsigned index = ROCKER_RING_INDEX(addr);
1132        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1133
1134        switch (addr & ROCKER_DMA_DESC_MASK) {
1135        case ROCKER_DMA_DESC_ADDR_OFFSET:
1136            ret = desc_ring_get_base_addr(r->rings[index]);
1137            break;
1138        default:
1139            DPRINTF("not implemented dma reg read(q) addr=0x" HWADDR_FMT_plx
1140                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1141            ret = 0;
1142            break;
1143        }
1144        return ret;
1145    }
1146
1147    switch (addr) {
1148    case ROCKER_BOGUS_REG0:
1149    case ROCKER_BOGUS_REG2:
1150        ret = 0xDEADBABEDEADBABEULL;
1151        break;
1152    case ROCKER_TEST_REG64:
1153        ret = r->test_reg64 * 2;
1154        break;
1155    case ROCKER_TEST_DMA_ADDR:
1156        ret = r->test_dma_addr;
1157        break;
1158    case ROCKER_PORT_PHYS_LINK_STATUS:
1159        ret = rocker_port_phys_link_status(r);
1160        break;
1161    case ROCKER_PORT_PHYS_ENABLE:
1162        ret = rocker_port_phys_enable_read(r);
1163        break;
1164    case ROCKER_SWITCH_ID:
1165        ret = r->switch_id;
1166        break;
1167    default:
1168        DPRINTF("not implemented read(q) addr=0x" HWADDR_FMT_plx "\n", addr);
1169        ret = 0;
1170        break;
1171    }
1172    return ret;
1173}
1174
1175static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1176{
1177    DPRINTF("Read %s addr " HWADDR_FMT_plx ", size %u\n",
1178            rocker_reg_name(opaque, addr), addr, size);
1179
1180    switch (size) {
1181    case 4:
1182        return rocker_io_readl(opaque, addr);
1183    case 8:
1184        return rocker_io_readq(opaque, addr);
1185    }
1186
1187    return -1;
1188}
1189
1190static const MemoryRegionOps rocker_mmio_ops = {
1191    .read = rocker_mmio_read,
1192    .write = rocker_mmio_write,
1193    .endianness = DEVICE_LITTLE_ENDIAN,
1194    .valid = {
1195        .min_access_size = 4,
1196        .max_access_size = 8,
1197    },
1198    .impl = {
1199        .min_access_size = 4,
1200        .max_access_size = 8,
1201    },
1202};
1203
1204static void rocker_msix_vectors_unuse(Rocker *r,
1205                                      unsigned int num_vectors)
1206{
1207    PCIDevice *dev = PCI_DEVICE(r);
1208    int i;
1209
1210    for (i = 0; i < num_vectors; i++) {
1211        msix_vector_unuse(dev, i);
1212    }
1213}
1214
1215static void rocker_msix_vectors_use(Rocker *r, unsigned int num_vectors)
1216{
1217    PCIDevice *dev = PCI_DEVICE(r);
1218    int i;
1219
1220    for (i = 0; i < num_vectors; i++) {
1221        msix_vector_use(dev, i);
1222    }
1223}
1224
1225static int rocker_msix_init(Rocker *r, Error **errp)
1226{
1227    PCIDevice *dev = PCI_DEVICE(r);
1228    int err;
1229
1230    err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1231                    &r->msix_bar,
1232                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1233                    &r->msix_bar,
1234                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1235                    0, errp);
1236    if (err) {
1237        return err;
1238    }
1239
1240    rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1241
1242    return 0;
1243}
1244
1245static void rocker_msix_uninit(Rocker *r)
1246{
1247    PCIDevice *dev = PCI_DEVICE(r);
1248
1249    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1250    rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1251}
1252
1253static World *rocker_world_type_by_name(Rocker *r, const char *name)
1254{
1255    int i;
1256
1257    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1258        if (strcmp(name, world_name(r->worlds[i])) == 0) {
1259            return r->worlds[i];
1260        }
1261    }
1262    return NULL;
1263}
1264
1265static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1266{
1267    Rocker *r = ROCKER(dev);
1268    const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1269    const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1270    static int sw_index;
1271    int i, err = 0;
1272
1273    /* allocate worlds */
1274
1275    r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1276
1277    if (!r->world_name) {
1278        r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1279    }
1280
1281    r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1282    if (!r->world_dflt) {
1283        error_setg(errp,
1284                "invalid argument requested world %s does not exist",
1285                r->world_name);
1286        goto err_world_type_by_name;
1287    }
1288
1289    /* set up memory-mapped region at BAR0 */
1290
1291    memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1292                          "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1293    pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1294                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1295
1296    /* set up memory-mapped region for MSI-X */
1297
1298    memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1299                       ROCKER_PCI_MSIX_BAR_SIZE);
1300    pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1301                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1302
1303    /* MSI-X init */
1304
1305    err = rocker_msix_init(r, errp);
1306    if (err) {
1307        goto err_msix_init;
1308    }
1309
1310    /* validate switch properties */
1311
1312    if (!r->name) {
1313        r->name = g_strdup(TYPE_ROCKER);
1314    }
1315
1316    if (rocker_find(r->name)) {
1317        error_setg(errp, "%s already exists", r->name);
1318        goto err_duplicate;
1319    }
1320
1321    /* Rocker name is passed in port name requests to OS with the intention
1322     * that the name is used in interface names. Limit the length of the
1323     * rocker name to avoid naming problems in the OS. Also, adding the
1324     * port number as p# and unganged breakout b#, where # is at most 2
1325     * digits, so leave room for it too (-1 for string terminator, -3 for
1326     * p# and -3 for b#)
1327     */
1328#define ROCKER_IFNAMSIZ 16
1329#define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1330    if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1331        error_setg(errp,
1332                "name too long; please shorten to at most %d chars",
1333                MAX_ROCKER_NAME_LEN);
1334        goto err_name_too_long;
1335    }
1336
1337    if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1338        memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1339        r->fp_start_macaddr.a[4] += (sw_index++);
1340    }
1341
1342    if (!r->switch_id) {
1343        memcpy(&r->switch_id, &r->fp_start_macaddr,
1344               sizeof(r->fp_start_macaddr));
1345    }
1346
1347    if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1348        r->fp_ports = ROCKER_FP_PORTS_MAX;
1349    }
1350
1351    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1352
1353    /* Rings are ordered like this:
1354     * - command ring
1355     * - event ring
1356     * - port0 tx ring
1357     * - port0 rx ring
1358     * - port1 tx ring
1359     * - port1 rx ring
1360     * .....
1361     */
1362
1363    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1364        DescRing *ring = desc_ring_alloc(r, i);
1365
1366        if (i == ROCKER_RING_CMD) {
1367            desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1368        } else if (i == ROCKER_RING_EVENT) {
1369            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1370        } else if (i % 2 == 0) {
1371            desc_ring_set_consume(ring, tx_consume,
1372                                  ROCKER_MSIX_VEC_TX((i - 2) / 2));
1373        } else if (i % 2 == 1) {
1374            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1375        }
1376
1377        r->rings[i] = ring;
1378    }
1379
1380    for (i = 0; i < r->fp_ports; i++) {
1381        FpPort *port =
1382            fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1383                          i, &r->fp_ports_peers[i]);
1384
1385        r->fp_port[i] = port;
1386        fp_port_set_world(port, r->world_dflt);
1387    }
1388
1389    QLIST_INSERT_HEAD(&rockers, r, next);
1390
1391    return;
1392
1393err_name_too_long:
1394err_duplicate:
1395    rocker_msix_uninit(r);
1396err_msix_init:
1397    object_unparent(OBJECT(&r->msix_bar));
1398    object_unparent(OBJECT(&r->mmio));
1399err_world_type_by_name:
1400    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1401        if (r->worlds[i]) {
1402            world_free(r->worlds[i]);
1403        }
1404    }
1405}
1406
1407static void pci_rocker_uninit(PCIDevice *dev)
1408{
1409    Rocker *r = ROCKER(dev);
1410    int i;
1411
1412    QLIST_REMOVE(r, next);
1413
1414    for (i = 0; i < r->fp_ports; i++) {
1415        FpPort *port = r->fp_port[i];
1416
1417        fp_port_free(port);
1418        r->fp_port[i] = NULL;
1419    }
1420
1421    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1422        if (r->rings[i]) {
1423            desc_ring_free(r->rings[i]);
1424        }
1425    }
1426    g_free(r->rings);
1427
1428    rocker_msix_uninit(r);
1429    object_unparent(OBJECT(&r->msix_bar));
1430    object_unparent(OBJECT(&r->mmio));
1431
1432    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1433        if (r->worlds[i]) {
1434            world_free(r->worlds[i]);
1435        }
1436    }
1437    g_free(r->fp_ports_peers);
1438}
1439
1440static void rocker_reset(DeviceState *dev)
1441{
1442    Rocker *r = ROCKER(dev);
1443    int i;
1444
1445    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1446        if (r->worlds[i]) {
1447            world_reset(r->worlds[i]);
1448        }
1449    }
1450    for (i = 0; i < r->fp_ports; i++) {
1451        fp_port_reset(r->fp_port[i]);
1452        fp_port_set_world(r->fp_port[i], r->world_dflt);
1453    }
1454
1455    r->test_reg = 0;
1456    r->test_reg64 = 0;
1457    r->test_dma_addr = 0;
1458    r->test_dma_size = 0;
1459
1460    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1461        desc_ring_reset(r->rings[i]);
1462    }
1463
1464    DPRINTF("Reset done\n");
1465}
1466
1467static Property rocker_properties[] = {
1468    DEFINE_PROP_STRING("name", Rocker, name),
1469    DEFINE_PROP_STRING("world", Rocker, world_name),
1470    DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1471                        fp_start_macaddr),
1472    DEFINE_PROP_UINT64("switch_id", Rocker,
1473                       switch_id, 0),
1474    DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1475                      fp_ports_peers, qdev_prop_netdev, NICPeers),
1476    DEFINE_PROP_END_OF_LIST(),
1477};
1478
1479static const VMStateDescription rocker_vmsd = {
1480    .name = TYPE_ROCKER,
1481    .unmigratable = 1,
1482};
1483
1484static void rocker_class_init(ObjectClass *klass, void *data)
1485{
1486    DeviceClass *dc = DEVICE_CLASS(klass);
1487    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1488
1489    k->realize = pci_rocker_realize;
1490    k->exit = pci_rocker_uninit;
1491    k->vendor_id = PCI_VENDOR_ID_REDHAT;
1492    k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1493    k->revision = ROCKER_PCI_REVISION;
1494    k->class_id = PCI_CLASS_NETWORK_OTHER;
1495    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1496    dc->desc = "Rocker Switch";
1497    dc->reset = rocker_reset;
1498    device_class_set_props(dc, rocker_properties);
1499    dc->vmsd = &rocker_vmsd;
1500}
1501
1502static const TypeInfo rocker_info = {
1503    .name          = TYPE_ROCKER,
1504    .parent        = TYPE_PCI_DEVICE,
1505    .instance_size = sizeof(Rocker),
1506    .class_init    = rocker_class_init,
1507    .interfaces = (InterfaceInfo[]) {
1508        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1509        { },
1510    },
1511};
1512
1513static void rocker_register_types(void)
1514{
1515    type_register_static(&rocker_info);
1516}
1517
1518type_init(rocker_register_types)
1519