qemu/hw/net/rocker/rocker.c
<<
>>
Prefs
   1/*
   2 * QEMU rocker switch emulation - PCI device
   3 *
   4 * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
   5 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "hw/pci/pci.h"
  20#include "hw/qdev-properties.h"
  21#include "migration/vmstate.h"
  22#include "hw/pci/msix.h"
  23#include "net/net.h"
  24#include "net/eth.h"
  25#include "qapi/error.h"
  26#include "qapi/qapi-commands-rocker.h"
  27#include "qemu/iov.h"
  28#include "qemu/module.h"
  29#include "qemu/bitops.h"
  30
  31#include "rocker.h"
  32#include "rocker_hw.h"
  33#include "rocker_fp.h"
  34#include "rocker_desc.h"
  35#include "rocker_tlv.h"
  36#include "rocker_world.h"
  37#include "rocker_of_dpa.h"
  38
  39struct rocker {
  40    /* private */
  41    PCIDevice parent_obj;
  42    /* public */
  43
  44    MemoryRegion mmio;
  45    MemoryRegion msix_bar;
  46
  47    /* switch configuration */
  48    char *name;                  /* switch name */
  49    char *world_name;            /* world name */
  50    uint32_t fp_ports;           /* front-panel port count */
  51    NICPeers *fp_ports_peers;
  52    MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
  53    uint64_t switch_id;          /* switch id */
  54
  55    /* front-panel ports */
  56    FpPort *fp_port[ROCKER_FP_PORTS_MAX];
  57
  58    /* register backings */
  59    uint32_t test_reg;
  60    uint64_t test_reg64;
  61    dma_addr_t test_dma_addr;
  62    uint32_t test_dma_size;
  63    uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
  64
  65    /* desc rings */
  66    DescRing **rings;
  67
  68    /* switch worlds */
  69    World *worlds[ROCKER_WORLD_TYPE_MAX];
  70    World *world_dflt;
  71
  72    QLIST_ENTRY(rocker) next;
  73};
  74
  75#define TYPE_ROCKER "rocker"
  76
  77#define ROCKER(obj) \
  78    OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
  79
  80static QLIST_HEAD(, rocker) rockers;
  81
  82Rocker *rocker_find(const char *name)
  83{
  84    Rocker *r;
  85
  86    QLIST_FOREACH(r, &rockers, next)
  87        if (strcmp(r->name, name) == 0) {
  88            return r;
  89        }
  90
  91    return NULL;
  92}
  93
  94World *rocker_get_world(Rocker *r, enum rocker_world_type type)
  95{
  96    if (type < ROCKER_WORLD_TYPE_MAX) {
  97        return r->worlds[type];
  98    }
  99    return NULL;
 100}
 101
 102RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 103{
 104    RockerSwitch *rocker;
 105    Rocker *r;
 106
 107    r = rocker_find(name);
 108    if (!r) {
 109        error_setg(errp, "rocker %s not found", name);
 110        return NULL;
 111    }
 112
 113    rocker = g_new0(RockerSwitch, 1);
 114    rocker->name = g_strdup(r->name);
 115    rocker->id = r->switch_id;
 116    rocker->ports = r->fp_ports;
 117
 118    return rocker;
 119}
 120
 121RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 122{
 123    RockerPortList *list = NULL;
 124    Rocker *r;
 125    int i;
 126
 127    r = rocker_find(name);
 128    if (!r) {
 129        error_setg(errp, "rocker %s not found", name);
 130        return NULL;
 131    }
 132
 133    for (i = r->fp_ports - 1; i >= 0; i--) {
 134        RockerPortList *info = g_malloc0(sizeof(*info));
 135        info->value = g_malloc0(sizeof(*info->value));
 136        struct fp_port *port = r->fp_port[i];
 137
 138        fp_port_get_info(port, info);
 139        info->next = list;
 140        list = info;
 141    }
 142
 143    return list;
 144}
 145
 146uint32_t rocker_fp_ports(Rocker *r)
 147{
 148    return r->fp_ports;
 149}
 150
 151static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
 152                                            DescRing *ring)
 153{
 154    return (desc_ring_index(ring) - 2) / 2 + 1;
 155}
 156
 157static int tx_consume(Rocker *r, DescInfo *info)
 158{
 159    PCIDevice *dev = PCI_DEVICE(r);
 160    char *buf = desc_get_buf(info, true);
 161    RockerTlv *tlv_frag;
 162    RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
 163    struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
 164    uint32_t pport;
 165    uint32_t port;
 166    uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
 167    uint16_t tx_l3_csum_off = 0;
 168    uint16_t tx_tso_mss = 0;
 169    uint16_t tx_tso_hdr_len = 0;
 170    int iovcnt = 0;
 171    int err = ROCKER_OK;
 172    int rem;
 173    int i;
 174
 175    if (!buf) {
 176        return -ROCKER_ENXIO;
 177    }
 178
 179    rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
 180
 181    if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
 182        return -ROCKER_EINVAL;
 183    }
 184
 185    pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
 186    if (!fp_port_from_pport(pport, &port)) {
 187        return -ROCKER_EINVAL;
 188    }
 189
 190    if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
 191        tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
 192    }
 193
 194    switch (tx_offload) {
 195    case ROCKER_TX_OFFLOAD_L3_CSUM:
 196        if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 197            return -ROCKER_EINVAL;
 198        }
 199        break;
 200    case ROCKER_TX_OFFLOAD_TSO:
 201        if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
 202            !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 203            return -ROCKER_EINVAL;
 204        }
 205        break;
 206    }
 207
 208    if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 209        tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
 210    }
 211
 212    if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
 213        tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
 214    }
 215
 216    if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 217        tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
 218    }
 219
 220    rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
 221        hwaddr frag_addr;
 222        uint16_t frag_len;
 223
 224        if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
 225            err = -ROCKER_EINVAL;
 226            goto err_bad_attr;
 227        }
 228
 229        rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
 230
 231        if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
 232            !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
 233            err = -ROCKER_EINVAL;
 234            goto err_bad_attr;
 235        }
 236
 237        frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 238        frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
 239
 240        if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
 241            goto err_too_many_frags;
 242        }
 243        iov[iovcnt].iov_len = frag_len;
 244        iov[iovcnt].iov_base = g_malloc(frag_len);
 245
 246        pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
 247                     iov[iovcnt].iov_len);
 248
 249        iovcnt++;
 250    }
 251
 252    if (iovcnt) {
 253        /* XXX perform Tx offloads */
 254        /* XXX   silence compiler for now */
 255        tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
 256    }
 257
 258    err = fp_port_eg(r->fp_port[port], iov, iovcnt);
 259
 260err_too_many_frags:
 261err_bad_attr:
 262    for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
 263        g_free(iov[i].iov_base);
 264    }
 265
 266    return err;
 267}
 268
 269static int cmd_get_port_settings(Rocker *r,
 270                                 DescInfo *info, char *buf,
 271                                 RockerTlv *cmd_info_tlv)
 272{
 273    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 274    RockerTlv *nest;
 275    FpPort *fp_port;
 276    uint32_t pport;
 277    uint32_t port;
 278    uint32_t speed;
 279    uint8_t duplex;
 280    uint8_t autoneg;
 281    uint8_t learning;
 282    char *phys_name;
 283    MACAddr macaddr;
 284    enum rocker_world_type mode;
 285    size_t tlv_size;
 286    int pos;
 287    int err;
 288
 289    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 290                            cmd_info_tlv);
 291
 292    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 293        return -ROCKER_EINVAL;
 294    }
 295
 296    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 297    if (!fp_port_from_pport(pport, &port)) {
 298        return -ROCKER_EINVAL;
 299    }
 300    fp_port = r->fp_port[port];
 301
 302    err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
 303    if (err) {
 304        return err;
 305    }
 306
 307    fp_port_get_macaddr(fp_port, &macaddr);
 308    mode = world_type(fp_port_get_world(fp_port));
 309    learning = fp_port_get_learning(fp_port);
 310    phys_name = fp_port_get_name(fp_port);
 311
 312    tlv_size = rocker_tlv_total_size(0) +                 /* nest */
 313               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 314               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
 315               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
 316               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
 317               rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
 318               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
 319               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
 320               rocker_tlv_total_size(strlen(phys_name));
 321
 322    if (tlv_size > desc_buf_size(info)) {
 323        return -ROCKER_EMSGSIZE;
 324    }
 325
 326    pos = 0;
 327    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
 328    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
 329    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
 330    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
 331    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
 332    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
 333                   sizeof(macaddr.a), macaddr.a);
 334    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
 335    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
 336                      learning);
 337    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
 338                   strlen(phys_name), phys_name);
 339    rocker_tlv_nest_end(buf, &pos, nest);
 340
 341    return desc_set_buf(info, tlv_size);
 342}
 343
 344static int cmd_set_port_settings(Rocker *r,
 345                                 RockerTlv *cmd_info_tlv)
 346{
 347    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 348    FpPort *fp_port;
 349    uint32_t pport;
 350    uint32_t port;
 351    uint32_t speed;
 352    uint8_t duplex;
 353    uint8_t autoneg;
 354    uint8_t learning;
 355    MACAddr macaddr;
 356    enum rocker_world_type mode;
 357    int err;
 358
 359    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 360                            cmd_info_tlv);
 361
 362    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 363        return -ROCKER_EINVAL;
 364    }
 365
 366    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 367    if (!fp_port_from_pport(pport, &port)) {
 368        return -ROCKER_EINVAL;
 369    }
 370    fp_port = r->fp_port[port];
 371
 372    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
 373        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
 374        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
 375
 376        speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
 377        duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
 378        autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 379
 380        err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
 381        if (err) {
 382            return err;
 383        }
 384    }
 385
 386    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
 387        if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
 388            sizeof(macaddr.a)) {
 389            return -ROCKER_EINVAL;
 390        }
 391        memcpy(macaddr.a,
 392               rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
 393               sizeof(macaddr.a));
 394        fp_port_set_macaddr(fp_port, &macaddr);
 395    }
 396
 397    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 398        mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
 399        if (mode >= ROCKER_WORLD_TYPE_MAX) {
 400            return -ROCKER_EINVAL;
 401        }
 402        /* We don't support world change. */
 403        if (!fp_port_check_world(fp_port, r->worlds[mode])) {
 404            return -ROCKER_EINVAL;
 405        }
 406    }
 407
 408    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
 409        learning =
 410            rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
 411        fp_port_set_learning(fp_port, learning);
 412    }
 413
 414    return ROCKER_OK;
 415}
 416
 417static int cmd_consume(Rocker *r, DescInfo *info)
 418{
 419    char *buf = desc_get_buf(info, false);
 420    RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
 421    RockerTlv *info_tlv;
 422    World *world;
 423    uint16_t cmd;
 424    int err;
 425
 426    if (!buf) {
 427        return -ROCKER_ENXIO;
 428    }
 429
 430    rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
 431
 432    if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
 433        return -ROCKER_EINVAL;
 434    }
 435
 436    cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
 437    info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
 438
 439    /* This might be reworked to something like this:
 440     * Every world will have an array of command handlers from
 441     * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
 442     * up to each world to implement whatever command it want.
 443     * It can reference "generic" commands as cmd_set_port_settings or
 444     * cmd_get_port_settings
 445     */
 446
 447    switch (cmd) {
 448    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
 449    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
 450    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
 451    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
 452    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
 453    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
 454    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
 455    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
 456        world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 457        err = world_do_cmd(world, info, buf, cmd, info_tlv);
 458        break;
 459    case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
 460        err = cmd_get_port_settings(r, info, buf, info_tlv);
 461        break;
 462    case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
 463        err = cmd_set_port_settings(r, info_tlv);
 464        break;
 465    default:
 466        err = -ROCKER_EINVAL;
 467        break;
 468    }
 469
 470    return err;
 471}
 472
 473static void rocker_msix_irq(Rocker *r, unsigned vector)
 474{
 475    PCIDevice *dev = PCI_DEVICE(r);
 476
 477    DPRINTF("MSI-X notify request for vector %d\n", vector);
 478    if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
 479        DPRINTF("incorrect vector %d\n", vector);
 480        return;
 481    }
 482    msix_notify(dev, vector);
 483}
 484
 485int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
 486{
 487    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 488    DescInfo *info = desc_ring_fetch_desc(ring);
 489    RockerTlv *nest;
 490    char *buf;
 491    size_t tlv_size;
 492    int pos;
 493    int err;
 494
 495    if (!info) {
 496        return -ROCKER_ENOBUFS;
 497    }
 498
 499    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 500               rocker_tlv_total_size(0) +                 /* nest */
 501               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 502               rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
 503
 504    if (tlv_size > desc_buf_size(info)) {
 505        err = -ROCKER_EMSGSIZE;
 506        goto err_too_big;
 507    }
 508
 509    buf = desc_get_buf(info, false);
 510    if (!buf) {
 511        err = -ROCKER_ENOMEM;
 512        goto err_no_mem;
 513    }
 514
 515    pos = 0;
 516    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 517                        ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
 518    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 519    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
 520    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
 521                      link_up ? 1 : 0);
 522    rocker_tlv_nest_end(buf, &pos, nest);
 523
 524    err = desc_set_buf(info, tlv_size);
 525
 526err_too_big:
 527err_no_mem:
 528    if (desc_ring_post_desc(ring, err)) {
 529        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 530    }
 531
 532    return err;
 533}
 534
 535int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
 536                               uint16_t vlan_id)
 537{
 538    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 539    DescInfo *info;
 540    FpPort *fp_port;
 541    uint32_t port;
 542    RockerTlv *nest;
 543    char *buf;
 544    size_t tlv_size;
 545    int pos;
 546    int err;
 547
 548    if (!fp_port_from_pport(pport, &port)) {
 549        return -ROCKER_EINVAL;
 550    }
 551    fp_port = r->fp_port[port];
 552    if (!fp_port_get_learning(fp_port)) {
 553        return ROCKER_OK;
 554    }
 555
 556    info = desc_ring_fetch_desc(ring);
 557    if (!info) {
 558        return -ROCKER_ENOBUFS;
 559    }
 560
 561    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 562               rocker_tlv_total_size(0) +                 /* nest */
 563               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 564               rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
 565               rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
 566
 567    if (tlv_size > desc_buf_size(info)) {
 568        err = -ROCKER_EMSGSIZE;
 569        goto err_too_big;
 570    }
 571
 572    buf = desc_get_buf(info, false);
 573    if (!buf) {
 574        err = -ROCKER_ENOMEM;
 575        goto err_no_mem;
 576    }
 577
 578    pos = 0;
 579    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 580                        ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
 581    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 582    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
 583    rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
 584    rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
 585    rocker_tlv_nest_end(buf, &pos, nest);
 586
 587    err = desc_set_buf(info, tlv_size);
 588
 589err_too_big:
 590err_no_mem:
 591    if (desc_ring_post_desc(ring, err)) {
 592        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 593    }
 594
 595    return err;
 596}
 597
 598static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
 599                                                     uint32_t pport)
 600{
 601    return r->rings[(pport - 1) * 2 + 3];
 602}
 603
 604int rx_produce(World *world, uint32_t pport,
 605               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 606{
 607    Rocker *r = world_rocker(world);
 608    PCIDevice *dev = (PCIDevice *)r;
 609    DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
 610    DescInfo *info = desc_ring_fetch_desc(ring);
 611    char *data;
 612    size_t data_size = iov_size(iov, iovcnt);
 613    char *buf;
 614    uint16_t rx_flags = 0;
 615    uint16_t rx_csum = 0;
 616    size_t tlv_size;
 617    RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
 618    hwaddr frag_addr;
 619    uint16_t frag_max_len;
 620    int pos;
 621    int err;
 622
 623    if (!info) {
 624        return -ROCKER_ENOBUFS;
 625    }
 626
 627    buf = desc_get_buf(info, false);
 628    if (!buf) {
 629        err = -ROCKER_ENXIO;
 630        goto out;
 631    }
 632    rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
 633
 634    if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
 635        !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
 636        err = -ROCKER_EINVAL;
 637        goto out;
 638    }
 639
 640    frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
 641    frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
 642
 643    if (data_size > frag_max_len) {
 644        err = -ROCKER_EMSGSIZE;
 645        goto out;
 646    }
 647
 648    if (copy_to_cpu) {
 649        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
 650    }
 651
 652    /* XXX calc rx flags/csum */
 653
 654    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
 655               rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
 656               rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
 657               rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
 658               rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
 659
 660    if (tlv_size > desc_buf_size(info)) {
 661        err = -ROCKER_EMSGSIZE;
 662        goto out;
 663    }
 664
 665    /* TODO:
 666     * iov dma write can be optimized in similar way e1000 does it in
 667     * e1000_receive_iov. But maybe if would make sense to introduce
 668     * generic helper iov_dma_write.
 669     */
 670
 671    data = g_malloc(data_size);
 672
 673    iov_to_buf(iov, iovcnt, 0, data, data_size);
 674    pci_dma_write(dev, frag_addr, data, data_size);
 675    g_free(data);
 676
 677    pos = 0;
 678    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
 679    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
 680    rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
 681    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
 682    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
 683
 684    err = desc_set_buf(info, tlv_size);
 685
 686out:
 687    if (desc_ring_post_desc(ring, err)) {
 688        rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
 689    }
 690
 691    return err;
 692}
 693
 694int rocker_port_eg(Rocker *r, uint32_t pport,
 695                   const struct iovec *iov, int iovcnt)
 696{
 697    FpPort *fp_port;
 698    uint32_t port;
 699
 700    if (!fp_port_from_pport(pport, &port)) {
 701        return -ROCKER_EINVAL;
 702    }
 703
 704    fp_port = r->fp_port[port];
 705
 706    return fp_port_eg(fp_port, iov, iovcnt);
 707}
 708
 709static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
 710{
 711    PCIDevice *dev = PCI_DEVICE(r);
 712    char *buf;
 713    int i;
 714
 715    buf = g_malloc(r->test_dma_size);
 716
 717    switch (val) {
 718    case ROCKER_TEST_DMA_CTRL_CLEAR:
 719        memset(buf, 0, r->test_dma_size);
 720        break;
 721    case ROCKER_TEST_DMA_CTRL_FILL:
 722        memset(buf, 0x96, r->test_dma_size);
 723        break;
 724    case ROCKER_TEST_DMA_CTRL_INVERT:
 725        pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
 726        for (i = 0; i < r->test_dma_size; i++) {
 727            buf[i] = ~buf[i];
 728        }
 729        break;
 730    default:
 731        DPRINTF("not test dma control val=0x%08x\n", val);
 732        goto err_out;
 733    }
 734    pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
 735
 736    rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
 737
 738err_out:
 739    g_free(buf);
 740}
 741
 742static void rocker_reset(DeviceState *dev);
 743
 744static void rocker_control(Rocker *r, uint32_t val)
 745{
 746    if (val & ROCKER_CONTROL_RESET) {
 747        rocker_reset(DEVICE(r));
 748    }
 749}
 750
 751static int rocker_pci_ring_count(Rocker *r)
 752{
 753    /* There are:
 754     * - command ring
 755     * - event ring
 756     * - tx and rx ring per each port
 757     */
 758    return 2 + (2 * r->fp_ports);
 759}
 760
 761static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
 762{
 763    hwaddr start = ROCKER_DMA_DESC_BASE;
 764    hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
 765
 766    return addr >= start && addr < end;
 767}
 768
 769static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
 770{
 771    int i;
 772    bool old_enabled;
 773    bool new_enabled;
 774    FpPort *fp_port;
 775
 776    for (i = 0; i < r->fp_ports; i++) {
 777        fp_port = r->fp_port[i];
 778        old_enabled = fp_port_enabled(fp_port);
 779        new_enabled = (new >> (i + 1)) & 0x1;
 780        if (new_enabled == old_enabled) {
 781            continue;
 782        }
 783        if (new_enabled) {
 784            fp_port_enable(r->fp_port[i]);
 785        } else {
 786            fp_port_disable(r->fp_port[i]);
 787        }
 788    }
 789}
 790
 791static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
 792{
 793    Rocker *r = opaque;
 794
 795    if (rocker_addr_is_desc_reg(r, addr)) {
 796        unsigned index = ROCKER_RING_INDEX(addr);
 797        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 798
 799        switch (offset) {
 800        case ROCKER_DMA_DESC_ADDR_OFFSET:
 801            r->lower32 = (uint64_t)val;
 802            break;
 803        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
 804            desc_ring_set_base_addr(r->rings[index],
 805                                    ((uint64_t)val) << 32 | r->lower32);
 806            r->lower32 = 0;
 807            break;
 808        case ROCKER_DMA_DESC_SIZE_OFFSET:
 809            desc_ring_set_size(r->rings[index], val);
 810            break;
 811        case ROCKER_DMA_DESC_HEAD_OFFSET:
 812            if (desc_ring_set_head(r->rings[index], val)) {
 813                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 814            }
 815            break;
 816        case ROCKER_DMA_DESC_CTRL_OFFSET:
 817            desc_ring_set_ctrl(r->rings[index], val);
 818            break;
 819        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 820            if (desc_ring_ret_credits(r->rings[index], val)) {
 821                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 822            }
 823            break;
 824        default:
 825            DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
 826                    " val=0x%08x (ring %d, addr=0x%02x)\n",
 827                    addr, val, index, offset);
 828            break;
 829        }
 830        return;
 831    }
 832
 833    switch (addr) {
 834    case ROCKER_TEST_REG:
 835        r->test_reg = val;
 836        break;
 837    case ROCKER_TEST_REG64:
 838    case ROCKER_TEST_DMA_ADDR:
 839    case ROCKER_PORT_PHYS_ENABLE:
 840        r->lower32 = (uint64_t)val;
 841        break;
 842    case ROCKER_TEST_REG64 + 4:
 843        r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
 844        r->lower32 = 0;
 845        break;
 846    case ROCKER_TEST_IRQ:
 847        rocker_msix_irq(r, val);
 848        break;
 849    case ROCKER_TEST_DMA_SIZE:
 850        r->test_dma_size = val & 0xFFFF;
 851        break;
 852    case ROCKER_TEST_DMA_ADDR + 4:
 853        r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
 854        r->lower32 = 0;
 855        break;
 856    case ROCKER_TEST_DMA_CTRL:
 857        rocker_test_dma_ctrl(r, val);
 858        break;
 859    case ROCKER_CONTROL:
 860        rocker_control(r, val);
 861        break;
 862    case ROCKER_PORT_PHYS_ENABLE + 4:
 863        rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
 864        r->lower32 = 0;
 865        break;
 866    default:
 867        DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
 868                " val=0x%08x\n", addr, val);
 869        break;
 870    }
 871}
 872
 873static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
 874{
 875    Rocker *r = opaque;
 876
 877    if (rocker_addr_is_desc_reg(r, addr)) {
 878        unsigned index = ROCKER_RING_INDEX(addr);
 879        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 880
 881        switch (offset) {
 882        case ROCKER_DMA_DESC_ADDR_OFFSET:
 883            desc_ring_set_base_addr(r->rings[index], val);
 884            break;
 885        default:
 886            DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
 887                    " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
 888                    addr, val, index, offset);
 889            break;
 890        }
 891        return;
 892    }
 893
 894    switch (addr) {
 895    case ROCKER_TEST_REG64:
 896        r->test_reg64 = val;
 897        break;
 898    case ROCKER_TEST_DMA_ADDR:
 899        r->test_dma_addr = val;
 900        break;
 901    case ROCKER_PORT_PHYS_ENABLE:
 902        rocker_port_phys_enable_write(r, val);
 903        break;
 904    default:
 905        DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
 906                " val=0x" TARGET_FMT_plx "\n", addr, val);
 907        break;
 908    }
 909}
 910
 911#ifdef DEBUG_ROCKER
 912#define regname(reg) case (reg): return #reg
 913static const char *rocker_reg_name(void *opaque, hwaddr addr)
 914{
 915    Rocker *r = opaque;
 916
 917    if (rocker_addr_is_desc_reg(r, addr)) {
 918        unsigned index = ROCKER_RING_INDEX(addr);
 919        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 920        static char buf[100];
 921        char ring_name[10];
 922
 923        switch (index) {
 924        case 0:
 925            sprintf(ring_name, "cmd");
 926            break;
 927        case 1:
 928            sprintf(ring_name, "event");
 929            break;
 930        default:
 931            sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
 932                    (index - 2) / 2);
 933        }
 934
 935        switch (offset) {
 936        case ROCKER_DMA_DESC_ADDR_OFFSET:
 937            sprintf(buf, "Ring[%s] ADDR", ring_name);
 938            return buf;
 939        case ROCKER_DMA_DESC_ADDR_OFFSET+4:
 940            sprintf(buf, "Ring[%s] ADDR+4", ring_name);
 941            return buf;
 942        case ROCKER_DMA_DESC_SIZE_OFFSET:
 943            sprintf(buf, "Ring[%s] SIZE", ring_name);
 944            return buf;
 945        case ROCKER_DMA_DESC_HEAD_OFFSET:
 946            sprintf(buf, "Ring[%s] HEAD", ring_name);
 947            return buf;
 948        case ROCKER_DMA_DESC_TAIL_OFFSET:
 949            sprintf(buf, "Ring[%s] TAIL", ring_name);
 950            return buf;
 951        case ROCKER_DMA_DESC_CTRL_OFFSET:
 952            sprintf(buf, "Ring[%s] CTRL", ring_name);
 953            return buf;
 954        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 955            sprintf(buf, "Ring[%s] CREDITS", ring_name);
 956            return buf;
 957        default:
 958            sprintf(buf, "Ring[%s] ???", ring_name);
 959            return buf;
 960        }
 961    } else {
 962        switch (addr) {
 963            regname(ROCKER_BOGUS_REG0);
 964            regname(ROCKER_BOGUS_REG1);
 965            regname(ROCKER_BOGUS_REG2);
 966            regname(ROCKER_BOGUS_REG3);
 967            regname(ROCKER_TEST_REG);
 968            regname(ROCKER_TEST_REG64);
 969            regname(ROCKER_TEST_REG64+4);
 970            regname(ROCKER_TEST_IRQ);
 971            regname(ROCKER_TEST_DMA_ADDR);
 972            regname(ROCKER_TEST_DMA_ADDR+4);
 973            regname(ROCKER_TEST_DMA_SIZE);
 974            regname(ROCKER_TEST_DMA_CTRL);
 975            regname(ROCKER_CONTROL);
 976            regname(ROCKER_PORT_PHYS_COUNT);
 977            regname(ROCKER_PORT_PHYS_LINK_STATUS);
 978            regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
 979            regname(ROCKER_PORT_PHYS_ENABLE);
 980            regname(ROCKER_PORT_PHYS_ENABLE+4);
 981            regname(ROCKER_SWITCH_ID);
 982            regname(ROCKER_SWITCH_ID+4);
 983        }
 984    }
 985    return "???";
 986}
 987#else
 988static const char *rocker_reg_name(void *opaque, hwaddr addr)
 989{
 990    return NULL;
 991}
 992#endif
 993
 994static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
 995                              unsigned size)
 996{
 997    DPRINTF("Write %s addr " TARGET_FMT_plx
 998            ", size %u, val " TARGET_FMT_plx "\n",
 999            rocker_reg_name(opaque, addr), addr, size, val);
1000
1001    switch (size) {
1002    case 4:
1003        rocker_io_writel(opaque, addr, val);
1004        break;
1005    case 8:
1006        rocker_io_writeq(opaque, addr, val);
1007        break;
1008    }
1009}
1010
1011static uint64_t rocker_port_phys_link_status(Rocker *r)
1012{
1013    int i;
1014    uint64_t status = 0;
1015
1016    for (i = 0; i < r->fp_ports; i++) {
1017        FpPort *port = r->fp_port[i];
1018
1019        if (fp_port_get_link_up(port)) {
1020            status |= 1 << (i + 1);
1021        }
1022    }
1023    return status;
1024}
1025
1026static uint64_t rocker_port_phys_enable_read(Rocker *r)
1027{
1028    int i;
1029    uint64_t ret = 0;
1030
1031    for (i = 0; i < r->fp_ports; i++) {
1032        FpPort *port = r->fp_port[i];
1033
1034        if (fp_port_enabled(port)) {
1035            ret |= 1 << (i + 1);
1036        }
1037    }
1038    return ret;
1039}
1040
1041static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1042{
1043    Rocker *r = opaque;
1044    uint32_t ret;
1045
1046    if (rocker_addr_is_desc_reg(r, addr)) {
1047        unsigned index = ROCKER_RING_INDEX(addr);
1048        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1049
1050        switch (offset) {
1051        case ROCKER_DMA_DESC_ADDR_OFFSET:
1052            ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1053            break;
1054        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1055            ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1056            break;
1057        case ROCKER_DMA_DESC_SIZE_OFFSET:
1058            ret = desc_ring_get_size(r->rings[index]);
1059            break;
1060        case ROCKER_DMA_DESC_HEAD_OFFSET:
1061            ret = desc_ring_get_head(r->rings[index]);
1062            break;
1063        case ROCKER_DMA_DESC_TAIL_OFFSET:
1064            ret = desc_ring_get_tail(r->rings[index]);
1065            break;
1066        case ROCKER_DMA_DESC_CREDITS_OFFSET:
1067            ret = desc_ring_get_credits(r->rings[index]);
1068            break;
1069        default:
1070            DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1071                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1072            ret = 0;
1073            break;
1074        }
1075        return ret;
1076    }
1077
1078    switch (addr) {
1079    case ROCKER_BOGUS_REG0:
1080    case ROCKER_BOGUS_REG1:
1081    case ROCKER_BOGUS_REG2:
1082    case ROCKER_BOGUS_REG3:
1083        ret = 0xDEADBABE;
1084        break;
1085    case ROCKER_TEST_REG:
1086        ret = r->test_reg * 2;
1087        break;
1088    case ROCKER_TEST_REG64:
1089        ret = (uint32_t)(r->test_reg64 * 2);
1090        break;
1091    case ROCKER_TEST_REG64 + 4:
1092        ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1093        break;
1094    case ROCKER_TEST_DMA_SIZE:
1095        ret = r->test_dma_size;
1096        break;
1097    case ROCKER_TEST_DMA_ADDR:
1098        ret = (uint32_t)r->test_dma_addr;
1099        break;
1100    case ROCKER_TEST_DMA_ADDR + 4:
1101        ret = (uint32_t)(r->test_dma_addr >> 32);
1102        break;
1103    case ROCKER_PORT_PHYS_COUNT:
1104        ret = r->fp_ports;
1105        break;
1106    case ROCKER_PORT_PHYS_LINK_STATUS:
1107        ret = (uint32_t)rocker_port_phys_link_status(r);
1108        break;
1109    case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1110        ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1111        break;
1112    case ROCKER_PORT_PHYS_ENABLE:
1113        ret = (uint32_t)rocker_port_phys_enable_read(r);
1114        break;
1115    case ROCKER_PORT_PHYS_ENABLE + 4:
1116        ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1117        break;
1118    case ROCKER_SWITCH_ID:
1119        ret = (uint32_t)r->switch_id;
1120        break;
1121    case ROCKER_SWITCH_ID + 4:
1122        ret = (uint32_t)(r->switch_id >> 32);
1123        break;
1124    default:
1125        DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1126        ret = 0;
1127        break;
1128    }
1129    return ret;
1130}
1131
1132static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1133{
1134    Rocker *r = opaque;
1135    uint64_t ret;
1136
1137    if (rocker_addr_is_desc_reg(r, addr)) {
1138        unsigned index = ROCKER_RING_INDEX(addr);
1139        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1140
1141        switch (addr & ROCKER_DMA_DESC_MASK) {
1142        case ROCKER_DMA_DESC_ADDR_OFFSET:
1143            ret = desc_ring_get_base_addr(r->rings[index]);
1144            break;
1145        default:
1146            DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1147                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1148            ret = 0;
1149            break;
1150        }
1151        return ret;
1152    }
1153
1154    switch (addr) {
1155    case ROCKER_BOGUS_REG0:
1156    case ROCKER_BOGUS_REG2:
1157        ret = 0xDEADBABEDEADBABEULL;
1158        break;
1159    case ROCKER_TEST_REG64:
1160        ret = r->test_reg64 * 2;
1161        break;
1162    case ROCKER_TEST_DMA_ADDR:
1163        ret = r->test_dma_addr;
1164        break;
1165    case ROCKER_PORT_PHYS_LINK_STATUS:
1166        ret = rocker_port_phys_link_status(r);
1167        break;
1168    case ROCKER_PORT_PHYS_ENABLE:
1169        ret = rocker_port_phys_enable_read(r);
1170        break;
1171    case ROCKER_SWITCH_ID:
1172        ret = r->switch_id;
1173        break;
1174    default:
1175        DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1176        ret = 0;
1177        break;
1178    }
1179    return ret;
1180}
1181
1182static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1183{
1184    DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1185            rocker_reg_name(opaque, addr), addr, size);
1186
1187    switch (size) {
1188    case 4:
1189        return rocker_io_readl(opaque, addr);
1190    case 8:
1191        return rocker_io_readq(opaque, addr);
1192    }
1193
1194    return -1;
1195}
1196
1197static const MemoryRegionOps rocker_mmio_ops = {
1198    .read = rocker_mmio_read,
1199    .write = rocker_mmio_write,
1200    .endianness = DEVICE_LITTLE_ENDIAN,
1201    .valid = {
1202        .min_access_size = 4,
1203        .max_access_size = 8,
1204    },
1205    .impl = {
1206        .min_access_size = 4,
1207        .max_access_size = 8,
1208    },
1209};
1210
1211static void rocker_msix_vectors_unuse(Rocker *r,
1212                                      unsigned int num_vectors)
1213{
1214    PCIDevice *dev = PCI_DEVICE(r);
1215    int i;
1216
1217    for (i = 0; i < num_vectors; i++) {
1218        msix_vector_unuse(dev, i);
1219    }
1220}
1221
1222static int rocker_msix_vectors_use(Rocker *r,
1223                                   unsigned int num_vectors)
1224{
1225    PCIDevice *dev = PCI_DEVICE(r);
1226    int err;
1227    int i;
1228
1229    for (i = 0; i < num_vectors; i++) {
1230        err = msix_vector_use(dev, i);
1231        if (err) {
1232            goto rollback;
1233        }
1234    }
1235    return 0;
1236
1237rollback:
1238    rocker_msix_vectors_unuse(r, i);
1239    return err;
1240}
1241
1242static int rocker_msix_init(Rocker *r, Error **errp)
1243{
1244    PCIDevice *dev = PCI_DEVICE(r);
1245    int err;
1246
1247    err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1248                    &r->msix_bar,
1249                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1250                    &r->msix_bar,
1251                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1252                    0, errp);
1253    if (err) {
1254        return err;
1255    }
1256
1257    err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1258    if (err) {
1259        goto err_msix_vectors_use;
1260    }
1261
1262    return 0;
1263
1264err_msix_vectors_use:
1265    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1266    return err;
1267}
1268
1269static void rocker_msix_uninit(Rocker *r)
1270{
1271    PCIDevice *dev = PCI_DEVICE(r);
1272
1273    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1274    rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1275}
1276
1277static World *rocker_world_type_by_name(Rocker *r, const char *name)
1278{
1279    int i;
1280
1281    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1282        if (strcmp(name, world_name(r->worlds[i])) == 0) {
1283            return r->worlds[i];
1284        }
1285    }
1286    return NULL;
1287}
1288
1289static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1290{
1291    Rocker *r = ROCKER(dev);
1292    const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1293    const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1294    static int sw_index;
1295    int i, err = 0;
1296
1297    /* allocate worlds */
1298
1299    r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1300
1301    if (!r->world_name) {
1302        r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1303    }
1304
1305    r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1306    if (!r->world_dflt) {
1307        error_setg(errp,
1308                "invalid argument requested world %s does not exist",
1309                r->world_name);
1310        goto err_world_type_by_name;
1311    }
1312
1313    /* set up memory-mapped region at BAR0 */
1314
1315    memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1316                          "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1317    pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1318                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1319
1320    /* set up memory-mapped region for MSI-X */
1321
1322    memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1323                       ROCKER_PCI_MSIX_BAR_SIZE);
1324    pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1325                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1326
1327    /* MSI-X init */
1328
1329    err = rocker_msix_init(r, errp);
1330    if (err) {
1331        goto err_msix_init;
1332    }
1333
1334    /* validate switch properties */
1335
1336    if (!r->name) {
1337        r->name = g_strdup(TYPE_ROCKER);
1338    }
1339
1340    if (rocker_find(r->name)) {
1341        error_setg(errp, "%s already exists", r->name);
1342        goto err_duplicate;
1343    }
1344
1345    /* Rocker name is passed in port name requests to OS with the intention
1346     * that the name is used in interface names. Limit the length of the
1347     * rocker name to avoid naming problems in the OS. Also, adding the
1348     * port number as p# and unganged breakout b#, where # is at most 2
1349     * digits, so leave room for it too (-1 for string terminator, -3 for
1350     * p# and -3 for b#)
1351     */
1352#define ROCKER_IFNAMSIZ 16
1353#define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1354    if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1355        error_setg(errp,
1356                "name too long; please shorten to at most %d chars",
1357                MAX_ROCKER_NAME_LEN);
1358        goto err_name_too_long;
1359    }
1360
1361    if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1362        memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1363        r->fp_start_macaddr.a[4] += (sw_index++);
1364    }
1365
1366    if (!r->switch_id) {
1367        memcpy(&r->switch_id, &r->fp_start_macaddr,
1368               sizeof(r->fp_start_macaddr));
1369    }
1370
1371    if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1372        r->fp_ports = ROCKER_FP_PORTS_MAX;
1373    }
1374
1375    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1376
1377    /* Rings are ordered like this:
1378     * - command ring
1379     * - event ring
1380     * - port0 tx ring
1381     * - port0 rx ring
1382     * - port1 tx ring
1383     * - port1 rx ring
1384     * .....
1385     */
1386
1387    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1388        DescRing *ring = desc_ring_alloc(r, i);
1389
1390        if (i == ROCKER_RING_CMD) {
1391            desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1392        } else if (i == ROCKER_RING_EVENT) {
1393            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1394        } else if (i % 2 == 0) {
1395            desc_ring_set_consume(ring, tx_consume,
1396                                  ROCKER_MSIX_VEC_TX((i - 2) / 2));
1397        } else if (i % 2 == 1) {
1398            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1399        }
1400
1401        r->rings[i] = ring;
1402    }
1403
1404    for (i = 0; i < r->fp_ports; i++) {
1405        FpPort *port =
1406            fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1407                          i, &r->fp_ports_peers[i]);
1408
1409        r->fp_port[i] = port;
1410        fp_port_set_world(port, r->world_dflt);
1411    }
1412
1413    QLIST_INSERT_HEAD(&rockers, r, next);
1414
1415    return;
1416
1417err_name_too_long:
1418err_duplicate:
1419    rocker_msix_uninit(r);
1420err_msix_init:
1421    object_unparent(OBJECT(&r->msix_bar));
1422    object_unparent(OBJECT(&r->mmio));
1423err_world_type_by_name:
1424    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1425        if (r->worlds[i]) {
1426            world_free(r->worlds[i]);
1427        }
1428    }
1429}
1430
1431static void pci_rocker_uninit(PCIDevice *dev)
1432{
1433    Rocker *r = ROCKER(dev);
1434    int i;
1435
1436    QLIST_REMOVE(r, next);
1437
1438    for (i = 0; i < r->fp_ports; i++) {
1439        FpPort *port = r->fp_port[i];
1440
1441        fp_port_free(port);
1442        r->fp_port[i] = NULL;
1443    }
1444
1445    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1446        if (r->rings[i]) {
1447            desc_ring_free(r->rings[i]);
1448        }
1449    }
1450    g_free(r->rings);
1451
1452    rocker_msix_uninit(r);
1453    object_unparent(OBJECT(&r->msix_bar));
1454    object_unparent(OBJECT(&r->mmio));
1455
1456    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1457        if (r->worlds[i]) {
1458            world_free(r->worlds[i]);
1459        }
1460    }
1461    g_free(r->fp_ports_peers);
1462}
1463
1464static void rocker_reset(DeviceState *dev)
1465{
1466    Rocker *r = ROCKER(dev);
1467    int i;
1468
1469    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1470        if (r->worlds[i]) {
1471            world_reset(r->worlds[i]);
1472        }
1473    }
1474    for (i = 0; i < r->fp_ports; i++) {
1475        fp_port_reset(r->fp_port[i]);
1476        fp_port_set_world(r->fp_port[i], r->world_dflt);
1477    }
1478
1479    r->test_reg = 0;
1480    r->test_reg64 = 0;
1481    r->test_dma_addr = 0;
1482    r->test_dma_size = 0;
1483
1484    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1485        desc_ring_reset(r->rings[i]);
1486    }
1487
1488    DPRINTF("Reset done\n");
1489}
1490
1491static Property rocker_properties[] = {
1492    DEFINE_PROP_STRING("name", Rocker, name),
1493    DEFINE_PROP_STRING("world", Rocker, world_name),
1494    DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1495                        fp_start_macaddr),
1496    DEFINE_PROP_UINT64("switch_id", Rocker,
1497                       switch_id, 0),
1498    DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1499                      fp_ports_peers, qdev_prop_netdev, NICPeers),
1500    DEFINE_PROP_END_OF_LIST(),
1501};
1502
1503static const VMStateDescription rocker_vmsd = {
1504    .name = TYPE_ROCKER,
1505    .unmigratable = 1,
1506};
1507
1508static void rocker_class_init(ObjectClass *klass, void *data)
1509{
1510    DeviceClass *dc = DEVICE_CLASS(klass);
1511    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1512
1513    k->realize = pci_rocker_realize;
1514    k->exit = pci_rocker_uninit;
1515    k->vendor_id = PCI_VENDOR_ID_REDHAT;
1516    k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1517    k->revision = ROCKER_PCI_REVISION;
1518    k->class_id = PCI_CLASS_NETWORK_OTHER;
1519    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1520    dc->desc = "Rocker Switch";
1521    dc->reset = rocker_reset;
1522    dc->props = rocker_properties;
1523    dc->vmsd = &rocker_vmsd;
1524}
1525
1526static const TypeInfo rocker_info = {
1527    .name          = TYPE_ROCKER,
1528    .parent        = TYPE_PCI_DEVICE,
1529    .instance_size = sizeof(Rocker),
1530    .class_init    = rocker_class_init,
1531    .interfaces = (InterfaceInfo[]) {
1532        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1533        { },
1534    },
1535};
1536
1537static void rocker_register_types(void)
1538{
1539    type_register_static(&rocker_info);
1540}
1541
1542type_init(rocker_register_types)
1543