qemu/hw/net/rocker/rocker.c
<<
>>
Prefs
   1/*
   2 * QEMU rocker switch emulation - PCI device
   3 *
   4 * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
   5 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "hw/hw.h"
  20#include "hw/pci/pci.h"
  21#include "hw/pci/msix.h"
  22#include "net/net.h"
  23#include "net/eth.h"
  24#include "qapi/error.h"
  25#include "qapi/qapi-commands-rocker.h"
  26#include "qemu/iov.h"
  27#include "qemu/module.h"
  28#include "qemu/bitops.h"
  29
  30#include "rocker.h"
  31#include "rocker_hw.h"
  32#include "rocker_fp.h"
  33#include "rocker_desc.h"
  34#include "rocker_tlv.h"
  35#include "rocker_world.h"
  36#include "rocker_of_dpa.h"
  37
  38struct rocker {
  39    /* private */
  40    PCIDevice parent_obj;
  41    /* public */
  42
  43    MemoryRegion mmio;
  44    MemoryRegion msix_bar;
  45
  46    /* switch configuration */
  47    char *name;                  /* switch name */
  48    char *world_name;            /* world name */
  49    uint32_t fp_ports;           /* front-panel port count */
  50    NICPeers *fp_ports_peers;
  51    MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
  52    uint64_t switch_id;          /* switch id */
  53
  54    /* front-panel ports */
  55    FpPort *fp_port[ROCKER_FP_PORTS_MAX];
  56
  57    /* register backings */
  58    uint32_t test_reg;
  59    uint64_t test_reg64;
  60    dma_addr_t test_dma_addr;
  61    uint32_t test_dma_size;
  62    uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
  63
  64    /* desc rings */
  65    DescRing **rings;
  66
  67    /* switch worlds */
  68    World *worlds[ROCKER_WORLD_TYPE_MAX];
  69    World *world_dflt;
  70
  71    QLIST_ENTRY(rocker) next;
  72};
  73
  74#define TYPE_ROCKER "rocker"
  75
  76#define ROCKER(obj) \
  77    OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
  78
  79static QLIST_HEAD(, rocker) rockers;
  80
  81Rocker *rocker_find(const char *name)
  82{
  83    Rocker *r;
  84
  85    QLIST_FOREACH(r, &rockers, next)
  86        if (strcmp(r->name, name) == 0) {
  87            return r;
  88        }
  89
  90    return NULL;
  91}
  92
  93World *rocker_get_world(Rocker *r, enum rocker_world_type type)
  94{
  95    if (type < ROCKER_WORLD_TYPE_MAX) {
  96        return r->worlds[type];
  97    }
  98    return NULL;
  99}
 100
 101RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 102{
 103    RockerSwitch *rocker;
 104    Rocker *r;
 105
 106    r = rocker_find(name);
 107    if (!r) {
 108        error_setg(errp, "rocker %s not found", name);
 109        return NULL;
 110    }
 111
 112    rocker = g_new0(RockerSwitch, 1);
 113    rocker->name = g_strdup(r->name);
 114    rocker->id = r->switch_id;
 115    rocker->ports = r->fp_ports;
 116
 117    return rocker;
 118}
 119
 120RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 121{
 122    RockerPortList *list = NULL;
 123    Rocker *r;
 124    int i;
 125
 126    r = rocker_find(name);
 127    if (!r) {
 128        error_setg(errp, "rocker %s not found", name);
 129        return NULL;
 130    }
 131
 132    for (i = r->fp_ports - 1; i >= 0; i--) {
 133        RockerPortList *info = g_malloc0(sizeof(*info));
 134        info->value = g_malloc0(sizeof(*info->value));
 135        struct fp_port *port = r->fp_port[i];
 136
 137        fp_port_get_info(port, info);
 138        info->next = list;
 139        list = info;
 140    }
 141
 142    return list;
 143}
 144
 145uint32_t rocker_fp_ports(Rocker *r)
 146{
 147    return r->fp_ports;
 148}
 149
 150static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
 151                                            DescRing *ring)
 152{
 153    return (desc_ring_index(ring) - 2) / 2 + 1;
 154}
 155
 156static int tx_consume(Rocker *r, DescInfo *info)
 157{
 158    PCIDevice *dev = PCI_DEVICE(r);
 159    char *buf = desc_get_buf(info, true);
 160    RockerTlv *tlv_frag;
 161    RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
 162    struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
 163    uint32_t pport;
 164    uint32_t port;
 165    uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
 166    uint16_t tx_l3_csum_off = 0;
 167    uint16_t tx_tso_mss = 0;
 168    uint16_t tx_tso_hdr_len = 0;
 169    int iovcnt = 0;
 170    int err = ROCKER_OK;
 171    int rem;
 172    int i;
 173
 174    if (!buf) {
 175        return -ROCKER_ENXIO;
 176    }
 177
 178    rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
 179
 180    if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
 181        return -ROCKER_EINVAL;
 182    }
 183
 184    pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
 185    if (!fp_port_from_pport(pport, &port)) {
 186        return -ROCKER_EINVAL;
 187    }
 188
 189    if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
 190        tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
 191    }
 192
 193    switch (tx_offload) {
 194    case ROCKER_TX_OFFLOAD_L3_CSUM:
 195        if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 196            return -ROCKER_EINVAL;
 197        }
 198        break;
 199    case ROCKER_TX_OFFLOAD_TSO:
 200        if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
 201            !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 202            return -ROCKER_EINVAL;
 203        }
 204        break;
 205    }
 206
 207    if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 208        tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
 209    }
 210
 211    if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
 212        tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
 213    }
 214
 215    if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 216        tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
 217    }
 218
 219    rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
 220        hwaddr frag_addr;
 221        uint16_t frag_len;
 222
 223        if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
 224            err = -ROCKER_EINVAL;
 225            goto err_bad_attr;
 226        }
 227
 228        rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
 229
 230        if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
 231            !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
 232            err = -ROCKER_EINVAL;
 233            goto err_bad_attr;
 234        }
 235
 236        frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 237        frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
 238
 239        if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
 240            goto err_too_many_frags;
 241        }
 242        iov[iovcnt].iov_len = frag_len;
 243        iov[iovcnt].iov_base = g_malloc(frag_len);
 244
 245        pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
 246                     iov[iovcnt].iov_len);
 247
 248        iovcnt++;
 249    }
 250
 251    if (iovcnt) {
 252        /* XXX perform Tx offloads */
 253        /* XXX   silence compiler for now */
 254        tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
 255    }
 256
 257    err = fp_port_eg(r->fp_port[port], iov, iovcnt);
 258
 259err_too_many_frags:
 260err_bad_attr:
 261    for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
 262        g_free(iov[i].iov_base);
 263    }
 264
 265    return err;
 266}
 267
 268static int cmd_get_port_settings(Rocker *r,
 269                                 DescInfo *info, char *buf,
 270                                 RockerTlv *cmd_info_tlv)
 271{
 272    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 273    RockerTlv *nest;
 274    FpPort *fp_port;
 275    uint32_t pport;
 276    uint32_t port;
 277    uint32_t speed;
 278    uint8_t duplex;
 279    uint8_t autoneg;
 280    uint8_t learning;
 281    char *phys_name;
 282    MACAddr macaddr;
 283    enum rocker_world_type mode;
 284    size_t tlv_size;
 285    int pos;
 286    int err;
 287
 288    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 289                            cmd_info_tlv);
 290
 291    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 292        return -ROCKER_EINVAL;
 293    }
 294
 295    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 296    if (!fp_port_from_pport(pport, &port)) {
 297        return -ROCKER_EINVAL;
 298    }
 299    fp_port = r->fp_port[port];
 300
 301    err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
 302    if (err) {
 303        return err;
 304    }
 305
 306    fp_port_get_macaddr(fp_port, &macaddr);
 307    mode = world_type(fp_port_get_world(fp_port));
 308    learning = fp_port_get_learning(fp_port);
 309    phys_name = fp_port_get_name(fp_port);
 310
 311    tlv_size = rocker_tlv_total_size(0) +                 /* nest */
 312               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 313               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
 314               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
 315               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
 316               rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
 317               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
 318               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
 319               rocker_tlv_total_size(strlen(phys_name));
 320
 321    if (tlv_size > desc_buf_size(info)) {
 322        return -ROCKER_EMSGSIZE;
 323    }
 324
 325    pos = 0;
 326    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
 327    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
 328    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
 329    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
 330    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
 331    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
 332                   sizeof(macaddr.a), macaddr.a);
 333    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
 334    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
 335                      learning);
 336    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
 337                   strlen(phys_name), phys_name);
 338    rocker_tlv_nest_end(buf, &pos, nest);
 339
 340    return desc_set_buf(info, tlv_size);
 341}
 342
 343static int cmd_set_port_settings(Rocker *r,
 344                                 RockerTlv *cmd_info_tlv)
 345{
 346    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 347    FpPort *fp_port;
 348    uint32_t pport;
 349    uint32_t port;
 350    uint32_t speed;
 351    uint8_t duplex;
 352    uint8_t autoneg;
 353    uint8_t learning;
 354    MACAddr macaddr;
 355    enum rocker_world_type mode;
 356    int err;
 357
 358    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 359                            cmd_info_tlv);
 360
 361    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 362        return -ROCKER_EINVAL;
 363    }
 364
 365    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 366    if (!fp_port_from_pport(pport, &port)) {
 367        return -ROCKER_EINVAL;
 368    }
 369    fp_port = r->fp_port[port];
 370
 371    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
 372        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
 373        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
 374
 375        speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
 376        duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
 377        autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 378
 379        err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
 380        if (err) {
 381            return err;
 382        }
 383    }
 384
 385    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
 386        if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
 387            sizeof(macaddr.a)) {
 388            return -ROCKER_EINVAL;
 389        }
 390        memcpy(macaddr.a,
 391               rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
 392               sizeof(macaddr.a));
 393        fp_port_set_macaddr(fp_port, &macaddr);
 394    }
 395
 396    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 397        mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
 398        if (mode >= ROCKER_WORLD_TYPE_MAX) {
 399            return -ROCKER_EINVAL;
 400        }
 401        /* We don't support world change. */
 402        if (!fp_port_check_world(fp_port, r->worlds[mode])) {
 403            return -ROCKER_EINVAL;
 404        }
 405    }
 406
 407    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
 408        learning =
 409            rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
 410        fp_port_set_learning(fp_port, learning);
 411    }
 412
 413    return ROCKER_OK;
 414}
 415
 416static int cmd_consume(Rocker *r, DescInfo *info)
 417{
 418    char *buf = desc_get_buf(info, false);
 419    RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
 420    RockerTlv *info_tlv;
 421    World *world;
 422    uint16_t cmd;
 423    int err;
 424
 425    if (!buf) {
 426        return -ROCKER_ENXIO;
 427    }
 428
 429    rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
 430
 431    if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
 432        return -ROCKER_EINVAL;
 433    }
 434
 435    cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
 436    info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
 437
 438    /* This might be reworked to something like this:
 439     * Every world will have an array of command handlers from
 440     * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
 441     * up to each world to implement whatever command it want.
 442     * It can reference "generic" commands as cmd_set_port_settings or
 443     * cmd_get_port_settings
 444     */
 445
 446    switch (cmd) {
 447    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
 448    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
 449    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
 450    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
 451    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
 452    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
 453    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
 454    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
 455        world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 456        err = world_do_cmd(world, info, buf, cmd, info_tlv);
 457        break;
 458    case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
 459        err = cmd_get_port_settings(r, info, buf, info_tlv);
 460        break;
 461    case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
 462        err = cmd_set_port_settings(r, info_tlv);
 463        break;
 464    default:
 465        err = -ROCKER_EINVAL;
 466        break;
 467    }
 468
 469    return err;
 470}
 471
 472static void rocker_msix_irq(Rocker *r, unsigned vector)
 473{
 474    PCIDevice *dev = PCI_DEVICE(r);
 475
 476    DPRINTF("MSI-X notify request for vector %d\n", vector);
 477    if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
 478        DPRINTF("incorrect vector %d\n", vector);
 479        return;
 480    }
 481    msix_notify(dev, vector);
 482}
 483
 484int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
 485{
 486    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 487    DescInfo *info = desc_ring_fetch_desc(ring);
 488    RockerTlv *nest;
 489    char *buf;
 490    size_t tlv_size;
 491    int pos;
 492    int err;
 493
 494    if (!info) {
 495        return -ROCKER_ENOBUFS;
 496    }
 497
 498    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 499               rocker_tlv_total_size(0) +                 /* nest */
 500               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 501               rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
 502
 503    if (tlv_size > desc_buf_size(info)) {
 504        err = -ROCKER_EMSGSIZE;
 505        goto err_too_big;
 506    }
 507
 508    buf = desc_get_buf(info, false);
 509    if (!buf) {
 510        err = -ROCKER_ENOMEM;
 511        goto err_no_mem;
 512    }
 513
 514    pos = 0;
 515    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 516                        ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
 517    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 518    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
 519    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
 520                      link_up ? 1 : 0);
 521    rocker_tlv_nest_end(buf, &pos, nest);
 522
 523    err = desc_set_buf(info, tlv_size);
 524
 525err_too_big:
 526err_no_mem:
 527    if (desc_ring_post_desc(ring, err)) {
 528        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 529    }
 530
 531    return err;
 532}
 533
 534int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
 535                               uint16_t vlan_id)
 536{
 537    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 538    DescInfo *info;
 539    FpPort *fp_port;
 540    uint32_t port;
 541    RockerTlv *nest;
 542    char *buf;
 543    size_t tlv_size;
 544    int pos;
 545    int err;
 546
 547    if (!fp_port_from_pport(pport, &port)) {
 548        return -ROCKER_EINVAL;
 549    }
 550    fp_port = r->fp_port[port];
 551    if (!fp_port_get_learning(fp_port)) {
 552        return ROCKER_OK;
 553    }
 554
 555    info = desc_ring_fetch_desc(ring);
 556    if (!info) {
 557        return -ROCKER_ENOBUFS;
 558    }
 559
 560    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 561               rocker_tlv_total_size(0) +                 /* nest */
 562               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 563               rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
 564               rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
 565
 566    if (tlv_size > desc_buf_size(info)) {
 567        err = -ROCKER_EMSGSIZE;
 568        goto err_too_big;
 569    }
 570
 571    buf = desc_get_buf(info, false);
 572    if (!buf) {
 573        err = -ROCKER_ENOMEM;
 574        goto err_no_mem;
 575    }
 576
 577    pos = 0;
 578    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 579                        ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
 580    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 581    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
 582    rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
 583    rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
 584    rocker_tlv_nest_end(buf, &pos, nest);
 585
 586    err = desc_set_buf(info, tlv_size);
 587
 588err_too_big:
 589err_no_mem:
 590    if (desc_ring_post_desc(ring, err)) {
 591        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 592    }
 593
 594    return err;
 595}
 596
 597static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
 598                                                     uint32_t pport)
 599{
 600    return r->rings[(pport - 1) * 2 + 3];
 601}
 602
 603int rx_produce(World *world, uint32_t pport,
 604               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 605{
 606    Rocker *r = world_rocker(world);
 607    PCIDevice *dev = (PCIDevice *)r;
 608    DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
 609    DescInfo *info = desc_ring_fetch_desc(ring);
 610    char *data;
 611    size_t data_size = iov_size(iov, iovcnt);
 612    char *buf;
 613    uint16_t rx_flags = 0;
 614    uint16_t rx_csum = 0;
 615    size_t tlv_size;
 616    RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
 617    hwaddr frag_addr;
 618    uint16_t frag_max_len;
 619    int pos;
 620    int err;
 621
 622    if (!info) {
 623        return -ROCKER_ENOBUFS;
 624    }
 625
 626    buf = desc_get_buf(info, false);
 627    if (!buf) {
 628        err = -ROCKER_ENXIO;
 629        goto out;
 630    }
 631    rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
 632
 633    if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
 634        !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
 635        err = -ROCKER_EINVAL;
 636        goto out;
 637    }
 638
 639    frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
 640    frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
 641
 642    if (data_size > frag_max_len) {
 643        err = -ROCKER_EMSGSIZE;
 644        goto out;
 645    }
 646
 647    if (copy_to_cpu) {
 648        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
 649    }
 650
 651    /* XXX calc rx flags/csum */
 652
 653    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
 654               rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
 655               rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
 656               rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
 657               rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
 658
 659    if (tlv_size > desc_buf_size(info)) {
 660        err = -ROCKER_EMSGSIZE;
 661        goto out;
 662    }
 663
 664    /* TODO:
 665     * iov dma write can be optimized in similar way e1000 does it in
 666     * e1000_receive_iov. But maybe if would make sense to introduce
 667     * generic helper iov_dma_write.
 668     */
 669
 670    data = g_malloc(data_size);
 671
 672    iov_to_buf(iov, iovcnt, 0, data, data_size);
 673    pci_dma_write(dev, frag_addr, data, data_size);
 674    g_free(data);
 675
 676    pos = 0;
 677    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
 678    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
 679    rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
 680    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
 681    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
 682
 683    err = desc_set_buf(info, tlv_size);
 684
 685out:
 686    if (desc_ring_post_desc(ring, err)) {
 687        rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
 688    }
 689
 690    return err;
 691}
 692
 693int rocker_port_eg(Rocker *r, uint32_t pport,
 694                   const struct iovec *iov, int iovcnt)
 695{
 696    FpPort *fp_port;
 697    uint32_t port;
 698
 699    if (!fp_port_from_pport(pport, &port)) {
 700        return -ROCKER_EINVAL;
 701    }
 702
 703    fp_port = r->fp_port[port];
 704
 705    return fp_port_eg(fp_port, iov, iovcnt);
 706}
 707
 708static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
 709{
 710    PCIDevice *dev = PCI_DEVICE(r);
 711    char *buf;
 712    int i;
 713
 714    buf = g_malloc(r->test_dma_size);
 715
 716    switch (val) {
 717    case ROCKER_TEST_DMA_CTRL_CLEAR:
 718        memset(buf, 0, r->test_dma_size);
 719        break;
 720    case ROCKER_TEST_DMA_CTRL_FILL:
 721        memset(buf, 0x96, r->test_dma_size);
 722        break;
 723    case ROCKER_TEST_DMA_CTRL_INVERT:
 724        pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
 725        for (i = 0; i < r->test_dma_size; i++) {
 726            buf[i] = ~buf[i];
 727        }
 728        break;
 729    default:
 730        DPRINTF("not test dma control val=0x%08x\n", val);
 731        goto err_out;
 732    }
 733    pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
 734
 735    rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
 736
 737err_out:
 738    g_free(buf);
 739}
 740
 741static void rocker_reset(DeviceState *dev);
 742
 743static void rocker_control(Rocker *r, uint32_t val)
 744{
 745    if (val & ROCKER_CONTROL_RESET) {
 746        rocker_reset(DEVICE(r));
 747    }
 748}
 749
 750static int rocker_pci_ring_count(Rocker *r)
 751{
 752    /* There are:
 753     * - command ring
 754     * - event ring
 755     * - tx and rx ring per each port
 756     */
 757    return 2 + (2 * r->fp_ports);
 758}
 759
 760static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
 761{
 762    hwaddr start = ROCKER_DMA_DESC_BASE;
 763    hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
 764
 765    return addr >= start && addr < end;
 766}
 767
 768static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
 769{
 770    int i;
 771    bool old_enabled;
 772    bool new_enabled;
 773    FpPort *fp_port;
 774
 775    for (i = 0; i < r->fp_ports; i++) {
 776        fp_port = r->fp_port[i];
 777        old_enabled = fp_port_enabled(fp_port);
 778        new_enabled = (new >> (i + 1)) & 0x1;
 779        if (new_enabled == old_enabled) {
 780            continue;
 781        }
 782        if (new_enabled) {
 783            fp_port_enable(r->fp_port[i]);
 784        } else {
 785            fp_port_disable(r->fp_port[i]);
 786        }
 787    }
 788}
 789
 790static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
 791{
 792    Rocker *r = opaque;
 793
 794    if (rocker_addr_is_desc_reg(r, addr)) {
 795        unsigned index = ROCKER_RING_INDEX(addr);
 796        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 797
 798        switch (offset) {
 799        case ROCKER_DMA_DESC_ADDR_OFFSET:
 800            r->lower32 = (uint64_t)val;
 801            break;
 802        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
 803            desc_ring_set_base_addr(r->rings[index],
 804                                    ((uint64_t)val) << 32 | r->lower32);
 805            r->lower32 = 0;
 806            break;
 807        case ROCKER_DMA_DESC_SIZE_OFFSET:
 808            desc_ring_set_size(r->rings[index], val);
 809            break;
 810        case ROCKER_DMA_DESC_HEAD_OFFSET:
 811            if (desc_ring_set_head(r->rings[index], val)) {
 812                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 813            }
 814            break;
 815        case ROCKER_DMA_DESC_CTRL_OFFSET:
 816            desc_ring_set_ctrl(r->rings[index], val);
 817            break;
 818        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 819            if (desc_ring_ret_credits(r->rings[index], val)) {
 820                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 821            }
 822            break;
 823        default:
 824            DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
 825                    " val=0x%08x (ring %d, addr=0x%02x)\n",
 826                    addr, val, index, offset);
 827            break;
 828        }
 829        return;
 830    }
 831
 832    switch (addr) {
 833    case ROCKER_TEST_REG:
 834        r->test_reg = val;
 835        break;
 836    case ROCKER_TEST_REG64:
 837    case ROCKER_TEST_DMA_ADDR:
 838    case ROCKER_PORT_PHYS_ENABLE:
 839        r->lower32 = (uint64_t)val;
 840        break;
 841    case ROCKER_TEST_REG64 + 4:
 842        r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
 843        r->lower32 = 0;
 844        break;
 845    case ROCKER_TEST_IRQ:
 846        rocker_msix_irq(r, val);
 847        break;
 848    case ROCKER_TEST_DMA_SIZE:
 849        r->test_dma_size = val & 0xFFFF;
 850        break;
 851    case ROCKER_TEST_DMA_ADDR + 4:
 852        r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
 853        r->lower32 = 0;
 854        break;
 855    case ROCKER_TEST_DMA_CTRL:
 856        rocker_test_dma_ctrl(r, val);
 857        break;
 858    case ROCKER_CONTROL:
 859        rocker_control(r, val);
 860        break;
 861    case ROCKER_PORT_PHYS_ENABLE + 4:
 862        rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
 863        r->lower32 = 0;
 864        break;
 865    default:
 866        DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
 867                " val=0x%08x\n", addr, val);
 868        break;
 869    }
 870}
 871
 872static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
 873{
 874    Rocker *r = opaque;
 875
 876    if (rocker_addr_is_desc_reg(r, addr)) {
 877        unsigned index = ROCKER_RING_INDEX(addr);
 878        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 879
 880        switch (offset) {
 881        case ROCKER_DMA_DESC_ADDR_OFFSET:
 882            desc_ring_set_base_addr(r->rings[index], val);
 883            break;
 884        default:
 885            DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
 886                    " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
 887                    addr, val, index, offset);
 888            break;
 889        }
 890        return;
 891    }
 892
 893    switch (addr) {
 894    case ROCKER_TEST_REG64:
 895        r->test_reg64 = val;
 896        break;
 897    case ROCKER_TEST_DMA_ADDR:
 898        r->test_dma_addr = val;
 899        break;
 900    case ROCKER_PORT_PHYS_ENABLE:
 901        rocker_port_phys_enable_write(r, val);
 902        break;
 903    default:
 904        DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
 905                " val=0x" TARGET_FMT_plx "\n", addr, val);
 906        break;
 907    }
 908}
 909
 910#ifdef DEBUG_ROCKER
 911#define regname(reg) case (reg): return #reg
 912static const char *rocker_reg_name(void *opaque, hwaddr addr)
 913{
 914    Rocker *r = opaque;
 915
 916    if (rocker_addr_is_desc_reg(r, addr)) {
 917        unsigned index = ROCKER_RING_INDEX(addr);
 918        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 919        static char buf[100];
 920        char ring_name[10];
 921
 922        switch (index) {
 923        case 0:
 924            sprintf(ring_name, "cmd");
 925            break;
 926        case 1:
 927            sprintf(ring_name, "event");
 928            break;
 929        default:
 930            sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
 931                    (index - 2) / 2);
 932        }
 933
 934        switch (offset) {
 935        case ROCKER_DMA_DESC_ADDR_OFFSET:
 936            sprintf(buf, "Ring[%s] ADDR", ring_name);
 937            return buf;
 938        case ROCKER_DMA_DESC_ADDR_OFFSET+4:
 939            sprintf(buf, "Ring[%s] ADDR+4", ring_name);
 940            return buf;
 941        case ROCKER_DMA_DESC_SIZE_OFFSET:
 942            sprintf(buf, "Ring[%s] SIZE", ring_name);
 943            return buf;
 944        case ROCKER_DMA_DESC_HEAD_OFFSET:
 945            sprintf(buf, "Ring[%s] HEAD", ring_name);
 946            return buf;
 947        case ROCKER_DMA_DESC_TAIL_OFFSET:
 948            sprintf(buf, "Ring[%s] TAIL", ring_name);
 949            return buf;
 950        case ROCKER_DMA_DESC_CTRL_OFFSET:
 951            sprintf(buf, "Ring[%s] CTRL", ring_name);
 952            return buf;
 953        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 954            sprintf(buf, "Ring[%s] CREDITS", ring_name);
 955            return buf;
 956        default:
 957            sprintf(buf, "Ring[%s] ???", ring_name);
 958            return buf;
 959        }
 960    } else {
 961        switch (addr) {
 962            regname(ROCKER_BOGUS_REG0);
 963            regname(ROCKER_BOGUS_REG1);
 964            regname(ROCKER_BOGUS_REG2);
 965            regname(ROCKER_BOGUS_REG3);
 966            regname(ROCKER_TEST_REG);
 967            regname(ROCKER_TEST_REG64);
 968            regname(ROCKER_TEST_REG64+4);
 969            regname(ROCKER_TEST_IRQ);
 970            regname(ROCKER_TEST_DMA_ADDR);
 971            regname(ROCKER_TEST_DMA_ADDR+4);
 972            regname(ROCKER_TEST_DMA_SIZE);
 973            regname(ROCKER_TEST_DMA_CTRL);
 974            regname(ROCKER_CONTROL);
 975            regname(ROCKER_PORT_PHYS_COUNT);
 976            regname(ROCKER_PORT_PHYS_LINK_STATUS);
 977            regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
 978            regname(ROCKER_PORT_PHYS_ENABLE);
 979            regname(ROCKER_PORT_PHYS_ENABLE+4);
 980            regname(ROCKER_SWITCH_ID);
 981            regname(ROCKER_SWITCH_ID+4);
 982        }
 983    }
 984    return "???";
 985}
 986#else
 987static const char *rocker_reg_name(void *opaque, hwaddr addr)
 988{
 989    return NULL;
 990}
 991#endif
 992
 993static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
 994                              unsigned size)
 995{
 996    DPRINTF("Write %s addr " TARGET_FMT_plx
 997            ", size %u, val " TARGET_FMT_plx "\n",
 998            rocker_reg_name(opaque, addr), addr, size, val);
 999
1000    switch (size) {
1001    case 4:
1002        rocker_io_writel(opaque, addr, val);
1003        break;
1004    case 8:
1005        rocker_io_writeq(opaque, addr, val);
1006        break;
1007    }
1008}
1009
1010static uint64_t rocker_port_phys_link_status(Rocker *r)
1011{
1012    int i;
1013    uint64_t status = 0;
1014
1015    for (i = 0; i < r->fp_ports; i++) {
1016        FpPort *port = r->fp_port[i];
1017
1018        if (fp_port_get_link_up(port)) {
1019            status |= 1 << (i + 1);
1020        }
1021    }
1022    return status;
1023}
1024
1025static uint64_t rocker_port_phys_enable_read(Rocker *r)
1026{
1027    int i;
1028    uint64_t ret = 0;
1029
1030    for (i = 0; i < r->fp_ports; i++) {
1031        FpPort *port = r->fp_port[i];
1032
1033        if (fp_port_enabled(port)) {
1034            ret |= 1 << (i + 1);
1035        }
1036    }
1037    return ret;
1038}
1039
1040static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1041{
1042    Rocker *r = opaque;
1043    uint32_t ret;
1044
1045    if (rocker_addr_is_desc_reg(r, addr)) {
1046        unsigned index = ROCKER_RING_INDEX(addr);
1047        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1048
1049        switch (offset) {
1050        case ROCKER_DMA_DESC_ADDR_OFFSET:
1051            ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1052            break;
1053        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1054            ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1055            break;
1056        case ROCKER_DMA_DESC_SIZE_OFFSET:
1057            ret = desc_ring_get_size(r->rings[index]);
1058            break;
1059        case ROCKER_DMA_DESC_HEAD_OFFSET:
1060            ret = desc_ring_get_head(r->rings[index]);
1061            break;
1062        case ROCKER_DMA_DESC_TAIL_OFFSET:
1063            ret = desc_ring_get_tail(r->rings[index]);
1064            break;
1065        case ROCKER_DMA_DESC_CREDITS_OFFSET:
1066            ret = desc_ring_get_credits(r->rings[index]);
1067            break;
1068        default:
1069            DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1070                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1071            ret = 0;
1072            break;
1073        }
1074        return ret;
1075    }
1076
1077    switch (addr) {
1078    case ROCKER_BOGUS_REG0:
1079    case ROCKER_BOGUS_REG1:
1080    case ROCKER_BOGUS_REG2:
1081    case ROCKER_BOGUS_REG3:
1082        ret = 0xDEADBABE;
1083        break;
1084    case ROCKER_TEST_REG:
1085        ret = r->test_reg * 2;
1086        break;
1087    case ROCKER_TEST_REG64:
1088        ret = (uint32_t)(r->test_reg64 * 2);
1089        break;
1090    case ROCKER_TEST_REG64 + 4:
1091        ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1092        break;
1093    case ROCKER_TEST_DMA_SIZE:
1094        ret = r->test_dma_size;
1095        break;
1096    case ROCKER_TEST_DMA_ADDR:
1097        ret = (uint32_t)r->test_dma_addr;
1098        break;
1099    case ROCKER_TEST_DMA_ADDR + 4:
1100        ret = (uint32_t)(r->test_dma_addr >> 32);
1101        break;
1102    case ROCKER_PORT_PHYS_COUNT:
1103        ret = r->fp_ports;
1104        break;
1105    case ROCKER_PORT_PHYS_LINK_STATUS:
1106        ret = (uint32_t)rocker_port_phys_link_status(r);
1107        break;
1108    case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1109        ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1110        break;
1111    case ROCKER_PORT_PHYS_ENABLE:
1112        ret = (uint32_t)rocker_port_phys_enable_read(r);
1113        break;
1114    case ROCKER_PORT_PHYS_ENABLE + 4:
1115        ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1116        break;
1117    case ROCKER_SWITCH_ID:
1118        ret = (uint32_t)r->switch_id;
1119        break;
1120    case ROCKER_SWITCH_ID + 4:
1121        ret = (uint32_t)(r->switch_id >> 32);
1122        break;
1123    default:
1124        DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1125        ret = 0;
1126        break;
1127    }
1128    return ret;
1129}
1130
1131static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1132{
1133    Rocker *r = opaque;
1134    uint64_t ret;
1135
1136    if (rocker_addr_is_desc_reg(r, addr)) {
1137        unsigned index = ROCKER_RING_INDEX(addr);
1138        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1139
1140        switch (addr & ROCKER_DMA_DESC_MASK) {
1141        case ROCKER_DMA_DESC_ADDR_OFFSET:
1142            ret = desc_ring_get_base_addr(r->rings[index]);
1143            break;
1144        default:
1145            DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1146                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1147            ret = 0;
1148            break;
1149        }
1150        return ret;
1151    }
1152
1153    switch (addr) {
1154    case ROCKER_BOGUS_REG0:
1155    case ROCKER_BOGUS_REG2:
1156        ret = 0xDEADBABEDEADBABEULL;
1157        break;
1158    case ROCKER_TEST_REG64:
1159        ret = r->test_reg64 * 2;
1160        break;
1161    case ROCKER_TEST_DMA_ADDR:
1162        ret = r->test_dma_addr;
1163        break;
1164    case ROCKER_PORT_PHYS_LINK_STATUS:
1165        ret = rocker_port_phys_link_status(r);
1166        break;
1167    case ROCKER_PORT_PHYS_ENABLE:
1168        ret = rocker_port_phys_enable_read(r);
1169        break;
1170    case ROCKER_SWITCH_ID:
1171        ret = r->switch_id;
1172        break;
1173    default:
1174        DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1175        ret = 0;
1176        break;
1177    }
1178    return ret;
1179}
1180
1181static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1182{
1183    DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1184            rocker_reg_name(opaque, addr), addr, size);
1185
1186    switch (size) {
1187    case 4:
1188        return rocker_io_readl(opaque, addr);
1189    case 8:
1190        return rocker_io_readq(opaque, addr);
1191    }
1192
1193    return -1;
1194}
1195
1196static const MemoryRegionOps rocker_mmio_ops = {
1197    .read = rocker_mmio_read,
1198    .write = rocker_mmio_write,
1199    .endianness = DEVICE_LITTLE_ENDIAN,
1200    .valid = {
1201        .min_access_size = 4,
1202        .max_access_size = 8,
1203    },
1204    .impl = {
1205        .min_access_size = 4,
1206        .max_access_size = 8,
1207    },
1208};
1209
1210static void rocker_msix_vectors_unuse(Rocker *r,
1211                                      unsigned int num_vectors)
1212{
1213    PCIDevice *dev = PCI_DEVICE(r);
1214    int i;
1215
1216    for (i = 0; i < num_vectors; i++) {
1217        msix_vector_unuse(dev, i);
1218    }
1219}
1220
1221static int rocker_msix_vectors_use(Rocker *r,
1222                                   unsigned int num_vectors)
1223{
1224    PCIDevice *dev = PCI_DEVICE(r);
1225    int err;
1226    int i;
1227
1228    for (i = 0; i < num_vectors; i++) {
1229        err = msix_vector_use(dev, i);
1230        if (err) {
1231            goto rollback;
1232        }
1233    }
1234    return 0;
1235
1236rollback:
1237    rocker_msix_vectors_unuse(r, i);
1238    return err;
1239}
1240
1241static int rocker_msix_init(Rocker *r, Error **errp)
1242{
1243    PCIDevice *dev = PCI_DEVICE(r);
1244    int err;
1245
1246    err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1247                    &r->msix_bar,
1248                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1249                    &r->msix_bar,
1250                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1251                    0, errp);
1252    if (err) {
1253        return err;
1254    }
1255
1256    err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1257    if (err) {
1258        goto err_msix_vectors_use;
1259    }
1260
1261    return 0;
1262
1263err_msix_vectors_use:
1264    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1265    return err;
1266}
1267
1268static void rocker_msix_uninit(Rocker *r)
1269{
1270    PCIDevice *dev = PCI_DEVICE(r);
1271
1272    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1273    rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1274}
1275
1276static World *rocker_world_type_by_name(Rocker *r, const char *name)
1277{
1278    int i;
1279
1280    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1281        if (strcmp(name, world_name(r->worlds[i])) == 0) {
1282            return r->worlds[i];
1283        }
1284    }
1285    return NULL;
1286}
1287
1288static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1289{
1290    Rocker *r = ROCKER(dev);
1291    const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1292    const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1293    static int sw_index;
1294    int i, err = 0;
1295
1296    /* allocate worlds */
1297
1298    r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1299
1300    if (!r->world_name) {
1301        r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1302    }
1303
1304    r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1305    if (!r->world_dflt) {
1306        error_setg(errp,
1307                "invalid argument requested world %s does not exist",
1308                r->world_name);
1309        goto err_world_type_by_name;
1310    }
1311
1312    /* set up memory-mapped region at BAR0 */
1313
1314    memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1315                          "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1316    pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1317                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1318
1319    /* set up memory-mapped region for MSI-X */
1320
1321    memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1322                       ROCKER_PCI_MSIX_BAR_SIZE);
1323    pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1324                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1325
1326    /* MSI-X init */
1327
1328    err = rocker_msix_init(r, errp);
1329    if (err) {
1330        goto err_msix_init;
1331    }
1332
1333    /* validate switch properties */
1334
1335    if (!r->name) {
1336        r->name = g_strdup(TYPE_ROCKER);
1337    }
1338
1339    if (rocker_find(r->name)) {
1340        error_setg(errp, "%s already exists", r->name);
1341        goto err_duplicate;
1342    }
1343
1344    /* Rocker name is passed in port name requests to OS with the intention
1345     * that the name is used in interface names. Limit the length of the
1346     * rocker name to avoid naming problems in the OS. Also, adding the
1347     * port number as p# and unganged breakout b#, where # is at most 2
1348     * digits, so leave room for it too (-1 for string terminator, -3 for
1349     * p# and -3 for b#)
1350     */
1351#define ROCKER_IFNAMSIZ 16
1352#define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1353    if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1354        error_setg(errp,
1355                "name too long; please shorten to at most %d chars",
1356                MAX_ROCKER_NAME_LEN);
1357        goto err_name_too_long;
1358    }
1359
1360    if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1361        memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1362        r->fp_start_macaddr.a[4] += (sw_index++);
1363    }
1364
1365    if (!r->switch_id) {
1366        memcpy(&r->switch_id, &r->fp_start_macaddr,
1367               sizeof(r->fp_start_macaddr));
1368    }
1369
1370    if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1371        r->fp_ports = ROCKER_FP_PORTS_MAX;
1372    }
1373
1374    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1375
1376    /* Rings are ordered like this:
1377     * - command ring
1378     * - event ring
1379     * - port0 tx ring
1380     * - port0 rx ring
1381     * - port1 tx ring
1382     * - port1 rx ring
1383     * .....
1384     */
1385
1386    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1387        DescRing *ring = desc_ring_alloc(r, i);
1388
1389        if (i == ROCKER_RING_CMD) {
1390            desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1391        } else if (i == ROCKER_RING_EVENT) {
1392            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1393        } else if (i % 2 == 0) {
1394            desc_ring_set_consume(ring, tx_consume,
1395                                  ROCKER_MSIX_VEC_TX((i - 2) / 2));
1396        } else if (i % 2 == 1) {
1397            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1398        }
1399
1400        r->rings[i] = ring;
1401    }
1402
1403    for (i = 0; i < r->fp_ports; i++) {
1404        FpPort *port =
1405            fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1406                          i, &r->fp_ports_peers[i]);
1407
1408        r->fp_port[i] = port;
1409        fp_port_set_world(port, r->world_dflt);
1410    }
1411
1412    QLIST_INSERT_HEAD(&rockers, r, next);
1413
1414    return;
1415
1416err_name_too_long:
1417err_duplicate:
1418    rocker_msix_uninit(r);
1419err_msix_init:
1420    object_unparent(OBJECT(&r->msix_bar));
1421    object_unparent(OBJECT(&r->mmio));
1422err_world_type_by_name:
1423    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1424        if (r->worlds[i]) {
1425            world_free(r->worlds[i]);
1426        }
1427    }
1428}
1429
1430static void pci_rocker_uninit(PCIDevice *dev)
1431{
1432    Rocker *r = ROCKER(dev);
1433    int i;
1434
1435    QLIST_REMOVE(r, next);
1436
1437    for (i = 0; i < r->fp_ports; i++) {
1438        FpPort *port = r->fp_port[i];
1439
1440        fp_port_free(port);
1441        r->fp_port[i] = NULL;
1442    }
1443
1444    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1445        if (r->rings[i]) {
1446            desc_ring_free(r->rings[i]);
1447        }
1448    }
1449    g_free(r->rings);
1450
1451    rocker_msix_uninit(r);
1452    object_unparent(OBJECT(&r->msix_bar));
1453    object_unparent(OBJECT(&r->mmio));
1454
1455    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1456        if (r->worlds[i]) {
1457            world_free(r->worlds[i]);
1458        }
1459    }
1460    g_free(r->fp_ports_peers);
1461}
1462
1463static void rocker_reset(DeviceState *dev)
1464{
1465    Rocker *r = ROCKER(dev);
1466    int i;
1467
1468    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1469        if (r->worlds[i]) {
1470            world_reset(r->worlds[i]);
1471        }
1472    }
1473    for (i = 0; i < r->fp_ports; i++) {
1474        fp_port_reset(r->fp_port[i]);
1475        fp_port_set_world(r->fp_port[i], r->world_dflt);
1476    }
1477
1478    r->test_reg = 0;
1479    r->test_reg64 = 0;
1480    r->test_dma_addr = 0;
1481    r->test_dma_size = 0;
1482
1483    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1484        desc_ring_reset(r->rings[i]);
1485    }
1486
1487    DPRINTF("Reset done\n");
1488}
1489
1490static Property rocker_properties[] = {
1491    DEFINE_PROP_STRING("name", Rocker, name),
1492    DEFINE_PROP_STRING("world", Rocker, world_name),
1493    DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1494                        fp_start_macaddr),
1495    DEFINE_PROP_UINT64("switch_id", Rocker,
1496                       switch_id, 0),
1497    DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1498                      fp_ports_peers, qdev_prop_netdev, NICPeers),
1499    DEFINE_PROP_END_OF_LIST(),
1500};
1501
1502static const VMStateDescription rocker_vmsd = {
1503    .name = TYPE_ROCKER,
1504    .unmigratable = 1,
1505};
1506
1507static void rocker_class_init(ObjectClass *klass, void *data)
1508{
1509    DeviceClass *dc = DEVICE_CLASS(klass);
1510    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1511
1512    k->realize = pci_rocker_realize;
1513    k->exit = pci_rocker_uninit;
1514    k->vendor_id = PCI_VENDOR_ID_REDHAT;
1515    k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1516    k->revision = ROCKER_PCI_REVISION;
1517    k->class_id = PCI_CLASS_NETWORK_OTHER;
1518    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1519    dc->desc = "Rocker Switch";
1520    dc->reset = rocker_reset;
1521    dc->props = rocker_properties;
1522    dc->vmsd = &rocker_vmsd;
1523}
1524
1525static const TypeInfo rocker_info = {
1526    .name          = TYPE_ROCKER,
1527    .parent        = TYPE_PCI_DEVICE,
1528    .instance_size = sizeof(Rocker),
1529    .class_init    = rocker_class_init,
1530    .interfaces = (InterfaceInfo[]) {
1531        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1532        { },
1533    },
1534};
1535
1536static void rocker_register_types(void)
1537{
1538    type_register_static(&rocker_info);
1539}
1540
1541type_init(rocker_register_types)
1542