qemu/hw/net/rocker/rocker.c
<<
>>
Prefs
   1/*
   2 * QEMU rocker switch emulation - PCI device
   3 *
   4 * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
   5 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "hw/hw.h"
  20#include "hw/pci/pci.h"
  21#include "hw/pci/msix.h"
  22#include "net/net.h"
  23#include "net/eth.h"
  24#include "qemu/iov.h"
  25#include "qemu/bitops.h"
  26#include "qmp-commands.h"
  27
  28#include "rocker.h"
  29#include "rocker_hw.h"
  30#include "rocker_fp.h"
  31#include "rocker_desc.h"
  32#include "rocker_tlv.h"
  33#include "rocker_world.h"
  34#include "rocker_of_dpa.h"
  35
  36struct rocker {
  37    /* private */
  38    PCIDevice parent_obj;
  39    /* public */
  40
  41    MemoryRegion mmio;
  42    MemoryRegion msix_bar;
  43
  44    /* switch configuration */
  45    char *name;                  /* switch name */
  46    char *world_name;            /* world name */
  47    uint32_t fp_ports;           /* front-panel port count */
  48    NICPeers *fp_ports_peers;
  49    MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
  50    uint64_t switch_id;          /* switch id */
  51
  52    /* front-panel ports */
  53    FpPort *fp_port[ROCKER_FP_PORTS_MAX];
  54
  55    /* register backings */
  56    uint32_t test_reg;
  57    uint64_t test_reg64;
  58    dma_addr_t test_dma_addr;
  59    uint32_t test_dma_size;
  60    uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
  61
  62    /* desc rings */
  63    DescRing **rings;
  64
  65    /* switch worlds */
  66    World *worlds[ROCKER_WORLD_TYPE_MAX];
  67    World *world_dflt;
  68
  69    QLIST_ENTRY(rocker) next;
  70};
  71
  72#define ROCKER "rocker"
  73
  74#define to_rocker(obj) \
  75    OBJECT_CHECK(Rocker, (obj), ROCKER)
  76
  77static QLIST_HEAD(, rocker) rockers;
  78
  79Rocker *rocker_find(const char *name)
  80{
  81    Rocker *r;
  82
  83    QLIST_FOREACH(r, &rockers, next)
  84        if (strcmp(r->name, name) == 0) {
  85            return r;
  86        }
  87
  88    return NULL;
  89}
  90
  91World *rocker_get_world(Rocker *r, enum rocker_world_type type)
  92{
  93    if (type < ROCKER_WORLD_TYPE_MAX) {
  94        return r->worlds[type];
  95    }
  96    return NULL;
  97}
  98
  99RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 100{
 101    RockerSwitch *rocker;
 102    Rocker *r;
 103
 104    r = rocker_find(name);
 105    if (!r) {
 106        error_setg(errp, "rocker %s not found", name);
 107        return NULL;
 108    }
 109
 110    rocker = g_new0(RockerSwitch, 1);
 111    rocker->name = g_strdup(r->name);
 112    rocker->id = r->switch_id;
 113    rocker->ports = r->fp_ports;
 114
 115    return rocker;
 116}
 117
 118RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 119{
 120    RockerPortList *list = NULL;
 121    Rocker *r;
 122    int i;
 123
 124    r = rocker_find(name);
 125    if (!r) {
 126        error_setg(errp, "rocker %s not found", name);
 127        return NULL;
 128    }
 129
 130    for (i = r->fp_ports - 1; i >= 0; i--) {
 131        RockerPortList *info = g_malloc0(sizeof(*info));
 132        info->value = g_malloc0(sizeof(*info->value));
 133        struct fp_port *port = r->fp_port[i];
 134
 135        fp_port_get_info(port, info);
 136        info->next = list;
 137        list = info;
 138    }
 139
 140    return list;
 141}
 142
 143uint32_t rocker_fp_ports(Rocker *r)
 144{
 145    return r->fp_ports;
 146}
 147
 148static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
 149                                            DescRing *ring)
 150{
 151    return (desc_ring_index(ring) - 2) / 2 + 1;
 152}
 153
 154static int tx_consume(Rocker *r, DescInfo *info)
 155{
 156    PCIDevice *dev = PCI_DEVICE(r);
 157    char *buf = desc_get_buf(info, true);
 158    RockerTlv *tlv_frag;
 159    RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
 160    struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
 161    uint32_t pport;
 162    uint32_t port;
 163    uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
 164    uint16_t tx_l3_csum_off = 0;
 165    uint16_t tx_tso_mss = 0;
 166    uint16_t tx_tso_hdr_len = 0;
 167    int iovcnt = 0;
 168    int err = ROCKER_OK;
 169    int rem;
 170    int i;
 171
 172    if (!buf) {
 173        return -ROCKER_ENXIO;
 174    }
 175
 176    rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
 177
 178    if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
 179        return -ROCKER_EINVAL;
 180    }
 181
 182    pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
 183    if (!fp_port_from_pport(pport, &port)) {
 184        return -ROCKER_EINVAL;
 185    }
 186
 187    if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
 188        tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
 189    }
 190
 191    switch (tx_offload) {
 192    case ROCKER_TX_OFFLOAD_L3_CSUM:
 193        if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 194            return -ROCKER_EINVAL;
 195        }
 196        break;
 197    case ROCKER_TX_OFFLOAD_TSO:
 198        if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
 199            !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 200            return -ROCKER_EINVAL;
 201        }
 202        break;
 203    }
 204
 205    if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 206        tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
 207    }
 208
 209    if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
 210        tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
 211    }
 212
 213    if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 214        tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
 215    }
 216
 217    rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
 218        hwaddr frag_addr;
 219        uint16_t frag_len;
 220
 221        if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
 222            err = -ROCKER_EINVAL;
 223            goto err_bad_attr;
 224        }
 225
 226        rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
 227
 228        if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
 229            !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
 230            err = -ROCKER_EINVAL;
 231            goto err_bad_attr;
 232        }
 233
 234        frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 235        frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
 236
 237        if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
 238            goto err_too_many_frags;
 239        }
 240        iov[iovcnt].iov_len = frag_len;
 241        iov[iovcnt].iov_base = g_malloc(frag_len);
 242        if (!iov[iovcnt].iov_base) {
 243            err = -ROCKER_ENOMEM;
 244            goto err_no_mem;
 245        }
 246
 247        pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
 248                     iov[iovcnt].iov_len);
 249
 250        iovcnt++;
 251    }
 252
 253    if (iovcnt) {
 254        /* XXX perform Tx offloads */
 255        /* XXX   silence compiler for now */
 256        tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
 257    }
 258
 259    err = fp_port_eg(r->fp_port[port], iov, iovcnt);
 260
 261err_too_many_frags:
 262err_no_mem:
 263err_bad_attr:
 264    for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
 265        g_free(iov[i].iov_base);
 266    }
 267
 268    return err;
 269}
 270
 271static int cmd_get_port_settings(Rocker *r,
 272                                 DescInfo *info, char *buf,
 273                                 RockerTlv *cmd_info_tlv)
 274{
 275    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 276    RockerTlv *nest;
 277    FpPort *fp_port;
 278    uint32_t pport;
 279    uint32_t port;
 280    uint32_t speed;
 281    uint8_t duplex;
 282    uint8_t autoneg;
 283    uint8_t learning;
 284    char *phys_name;
 285    MACAddr macaddr;
 286    enum rocker_world_type mode;
 287    size_t tlv_size;
 288    int pos;
 289    int err;
 290
 291    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 292                            cmd_info_tlv);
 293
 294    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 295        return -ROCKER_EINVAL;
 296    }
 297
 298    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 299    if (!fp_port_from_pport(pport, &port)) {
 300        return -ROCKER_EINVAL;
 301    }
 302    fp_port = r->fp_port[port];
 303
 304    err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
 305    if (err) {
 306        return err;
 307    }
 308
 309    fp_port_get_macaddr(fp_port, &macaddr);
 310    mode = world_type(fp_port_get_world(fp_port));
 311    learning = fp_port_get_learning(fp_port);
 312    phys_name = fp_port_get_name(fp_port);
 313
 314    tlv_size = rocker_tlv_total_size(0) +                 /* nest */
 315               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 316               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
 317               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
 318               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
 319               rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
 320               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
 321               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
 322               rocker_tlv_total_size(strlen(phys_name));
 323
 324    if (tlv_size > desc_buf_size(info)) {
 325        return -ROCKER_EMSGSIZE;
 326    }
 327
 328    pos = 0;
 329    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
 330    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
 331    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
 332    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
 333    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
 334    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
 335                   sizeof(macaddr.a), macaddr.a);
 336    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
 337    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
 338                      learning);
 339    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
 340                   strlen(phys_name), phys_name);
 341    rocker_tlv_nest_end(buf, &pos, nest);
 342
 343    return desc_set_buf(info, tlv_size);
 344}
 345
 346static int cmd_set_port_settings(Rocker *r,
 347                                 RockerTlv *cmd_info_tlv)
 348{
 349    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 350    FpPort *fp_port;
 351    uint32_t pport;
 352    uint32_t port;
 353    uint32_t speed;
 354    uint8_t duplex;
 355    uint8_t autoneg;
 356    uint8_t learning;
 357    MACAddr macaddr;
 358    enum rocker_world_type mode;
 359    int err;
 360
 361    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 362                            cmd_info_tlv);
 363
 364    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 365        return -ROCKER_EINVAL;
 366    }
 367
 368    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 369    if (!fp_port_from_pport(pport, &port)) {
 370        return -ROCKER_EINVAL;
 371    }
 372    fp_port = r->fp_port[port];
 373
 374    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
 375        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
 376        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
 377
 378        speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
 379        duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
 380        autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 381
 382        err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
 383        if (err) {
 384            return err;
 385        }
 386    }
 387
 388    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
 389        if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
 390            sizeof(macaddr.a)) {
 391            return -ROCKER_EINVAL;
 392        }
 393        memcpy(macaddr.a,
 394               rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
 395               sizeof(macaddr.a));
 396        fp_port_set_macaddr(fp_port, &macaddr);
 397    }
 398
 399    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 400        mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
 401        if (mode >= ROCKER_WORLD_TYPE_MAX) {
 402            return -ROCKER_EINVAL;
 403        }
 404        /* We don't support world change. */
 405        if (!fp_port_check_world(fp_port, r->worlds[mode])) {
 406            return -ROCKER_EINVAL;
 407        }
 408    }
 409
 410    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
 411        learning =
 412            rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
 413        fp_port_set_learning(fp_port, learning);
 414    }
 415
 416    return ROCKER_OK;
 417}
 418
 419static int cmd_consume(Rocker *r, DescInfo *info)
 420{
 421    char *buf = desc_get_buf(info, false);
 422    RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
 423    RockerTlv *info_tlv;
 424    World *world;
 425    uint16_t cmd;
 426    int err;
 427
 428    if (!buf) {
 429        return -ROCKER_ENXIO;
 430    }
 431
 432    rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
 433
 434    if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
 435        return -ROCKER_EINVAL;
 436    }
 437
 438    cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
 439    info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
 440
 441    /* This might be reworked to something like this:
 442     * Every world will have an array of command handlers from
 443     * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
 444     * up to each world to implement whatever command it want.
 445     * It can reference "generic" commands as cmd_set_port_settings or
 446     * cmd_get_port_settings
 447     */
 448
 449    switch (cmd) {
 450    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
 451    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
 452    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
 453    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
 454    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
 455    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
 456    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
 457    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
 458        world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 459        err = world_do_cmd(world, info, buf, cmd, info_tlv);
 460        break;
 461    case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
 462        err = cmd_get_port_settings(r, info, buf, info_tlv);
 463        break;
 464    case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
 465        err = cmd_set_port_settings(r, info_tlv);
 466        break;
 467    default:
 468        err = -ROCKER_EINVAL;
 469        break;
 470    }
 471
 472    return err;
 473}
 474
 475static void rocker_msix_irq(Rocker *r, unsigned vector)
 476{
 477    PCIDevice *dev = PCI_DEVICE(r);
 478
 479    DPRINTF("MSI-X notify request for vector %d\n", vector);
 480    if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
 481        DPRINTF("incorrect vector %d\n", vector);
 482        return;
 483    }
 484    msix_notify(dev, vector);
 485}
 486
 487int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
 488{
 489    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 490    DescInfo *info = desc_ring_fetch_desc(ring);
 491    RockerTlv *nest;
 492    char *buf;
 493    size_t tlv_size;
 494    int pos;
 495    int err;
 496
 497    if (!info) {
 498        return -ROCKER_ENOBUFS;
 499    }
 500
 501    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 502               rocker_tlv_total_size(0) +                 /* nest */
 503               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 504               rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
 505
 506    if (tlv_size > desc_buf_size(info)) {
 507        err = -ROCKER_EMSGSIZE;
 508        goto err_too_big;
 509    }
 510
 511    buf = desc_get_buf(info, false);
 512    if (!buf) {
 513        err = -ROCKER_ENOMEM;
 514        goto err_no_mem;
 515    }
 516
 517    pos = 0;
 518    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 519                        ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
 520    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 521    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
 522    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
 523                      link_up ? 1 : 0);
 524    rocker_tlv_nest_end(buf, &pos, nest);
 525
 526    err = desc_set_buf(info, tlv_size);
 527
 528err_too_big:
 529err_no_mem:
 530    if (desc_ring_post_desc(ring, err)) {
 531        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 532    }
 533
 534    return err;
 535}
 536
 537int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
 538                               uint16_t vlan_id)
 539{
 540    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 541    DescInfo *info;
 542    FpPort *fp_port;
 543    uint32_t port;
 544    RockerTlv *nest;
 545    char *buf;
 546    size_t tlv_size;
 547    int pos;
 548    int err;
 549
 550    if (!fp_port_from_pport(pport, &port)) {
 551        return -ROCKER_EINVAL;
 552    }
 553    fp_port = r->fp_port[port];
 554    if (!fp_port_get_learning(fp_port)) {
 555        return ROCKER_OK;
 556    }
 557
 558    info = desc_ring_fetch_desc(ring);
 559    if (!info) {
 560        return -ROCKER_ENOBUFS;
 561    }
 562
 563    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 564               rocker_tlv_total_size(0) +                 /* nest */
 565               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 566               rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
 567               rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
 568
 569    if (tlv_size > desc_buf_size(info)) {
 570        err = -ROCKER_EMSGSIZE;
 571        goto err_too_big;
 572    }
 573
 574    buf = desc_get_buf(info, false);
 575    if (!buf) {
 576        err = -ROCKER_ENOMEM;
 577        goto err_no_mem;
 578    }
 579
 580    pos = 0;
 581    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 582                        ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
 583    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 584    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
 585    rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
 586    rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
 587    rocker_tlv_nest_end(buf, &pos, nest);
 588
 589    err = desc_set_buf(info, tlv_size);
 590
 591err_too_big:
 592err_no_mem:
 593    if (desc_ring_post_desc(ring, err)) {
 594        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 595    }
 596
 597    return err;
 598}
 599
 600static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
 601                                                     uint32_t pport)
 602{
 603    return r->rings[(pport - 1) * 2 + 3];
 604}
 605
 606int rx_produce(World *world, uint32_t pport,
 607               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 608{
 609    Rocker *r = world_rocker(world);
 610    PCIDevice *dev = (PCIDevice *)r;
 611    DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
 612    DescInfo *info = desc_ring_fetch_desc(ring);
 613    char *data;
 614    size_t data_size = iov_size(iov, iovcnt);
 615    char *buf;
 616    uint16_t rx_flags = 0;
 617    uint16_t rx_csum = 0;
 618    size_t tlv_size;
 619    RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
 620    hwaddr frag_addr;
 621    uint16_t frag_max_len;
 622    int pos;
 623    int err;
 624
 625    if (!info) {
 626        return -ROCKER_ENOBUFS;
 627    }
 628
 629    buf = desc_get_buf(info, false);
 630    if (!buf) {
 631        err = -ROCKER_ENXIO;
 632        goto out;
 633    }
 634    rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
 635
 636    if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
 637        !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
 638        err = -ROCKER_EINVAL;
 639        goto out;
 640    }
 641
 642    frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
 643    frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
 644
 645    if (data_size > frag_max_len) {
 646        err = -ROCKER_EMSGSIZE;
 647        goto out;
 648    }
 649
 650    if (copy_to_cpu) {
 651        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
 652    }
 653
 654    /* XXX calc rx flags/csum */
 655
 656    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
 657               rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
 658               rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
 659               rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
 660               rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
 661
 662    if (tlv_size > desc_buf_size(info)) {
 663        err = -ROCKER_EMSGSIZE;
 664        goto out;
 665    }
 666
 667    /* TODO:
 668     * iov dma write can be optimized in similar way e1000 does it in
 669     * e1000_receive_iov. But maybe if would make sense to introduce
 670     * generic helper iov_dma_write.
 671     */
 672
 673    data = g_malloc(data_size);
 674    if (!data) {
 675        err = -ROCKER_ENOMEM;
 676        goto out;
 677    }
 678    iov_to_buf(iov, iovcnt, 0, data, data_size);
 679    pci_dma_write(dev, frag_addr, data, data_size);
 680    g_free(data);
 681
 682    pos = 0;
 683    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
 684    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
 685    rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
 686    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
 687    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
 688
 689    err = desc_set_buf(info, tlv_size);
 690
 691out:
 692    if (desc_ring_post_desc(ring, err)) {
 693        rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
 694    }
 695
 696    return err;
 697}
 698
 699int rocker_port_eg(Rocker *r, uint32_t pport,
 700                   const struct iovec *iov, int iovcnt)
 701{
 702    FpPort *fp_port;
 703    uint32_t port;
 704
 705    if (!fp_port_from_pport(pport, &port)) {
 706        return -ROCKER_EINVAL;
 707    }
 708
 709    fp_port = r->fp_port[port];
 710
 711    return fp_port_eg(fp_port, iov, iovcnt);
 712}
 713
 714static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
 715{
 716    PCIDevice *dev = PCI_DEVICE(r);
 717    char *buf;
 718    int i;
 719
 720    buf = g_malloc(r->test_dma_size);
 721
 722    if (!buf) {
 723        DPRINTF("test dma buffer alloc failed");
 724        return;
 725    }
 726
 727    switch (val) {
 728    case ROCKER_TEST_DMA_CTRL_CLEAR:
 729        memset(buf, 0, r->test_dma_size);
 730        break;
 731    case ROCKER_TEST_DMA_CTRL_FILL:
 732        memset(buf, 0x96, r->test_dma_size);
 733        break;
 734    case ROCKER_TEST_DMA_CTRL_INVERT:
 735        pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
 736        for (i = 0; i < r->test_dma_size; i++) {
 737            buf[i] = ~buf[i];
 738        }
 739        break;
 740    default:
 741        DPRINTF("not test dma control val=0x%08x\n", val);
 742        goto err_out;
 743    }
 744    pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
 745
 746    rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
 747
 748err_out:
 749    g_free(buf);
 750}
 751
 752static void rocker_reset(DeviceState *dev);
 753
 754static void rocker_control(Rocker *r, uint32_t val)
 755{
 756    if (val & ROCKER_CONTROL_RESET) {
 757        rocker_reset(DEVICE(r));
 758    }
 759}
 760
 761static int rocker_pci_ring_count(Rocker *r)
 762{
 763    /* There are:
 764     * - command ring
 765     * - event ring
 766     * - tx and rx ring per each port
 767     */
 768    return 2 + (2 * r->fp_ports);
 769}
 770
 771static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
 772{
 773    hwaddr start = ROCKER_DMA_DESC_BASE;
 774    hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
 775
 776    return addr >= start && addr < end;
 777}
 778
 779static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
 780{
 781    int i;
 782    bool old_enabled;
 783    bool new_enabled;
 784    FpPort *fp_port;
 785
 786    for (i = 0; i < r->fp_ports; i++) {
 787        fp_port = r->fp_port[i];
 788        old_enabled = fp_port_enabled(fp_port);
 789        new_enabled = (new >> (i + 1)) & 0x1;
 790        if (new_enabled == old_enabled) {
 791            continue;
 792        }
 793        if (new_enabled) {
 794            fp_port_enable(r->fp_port[i]);
 795        } else {
 796            fp_port_disable(r->fp_port[i]);
 797        }
 798    }
 799}
 800
 801static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
 802{
 803    Rocker *r = opaque;
 804
 805    if (rocker_addr_is_desc_reg(r, addr)) {
 806        unsigned index = ROCKER_RING_INDEX(addr);
 807        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 808
 809        switch (offset) {
 810        case ROCKER_DMA_DESC_ADDR_OFFSET:
 811            r->lower32 = (uint64_t)val;
 812            break;
 813        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
 814            desc_ring_set_base_addr(r->rings[index],
 815                                    ((uint64_t)val) << 32 | r->lower32);
 816            r->lower32 = 0;
 817            break;
 818        case ROCKER_DMA_DESC_SIZE_OFFSET:
 819            desc_ring_set_size(r->rings[index], val);
 820            break;
 821        case ROCKER_DMA_DESC_HEAD_OFFSET:
 822            if (desc_ring_set_head(r->rings[index], val)) {
 823                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 824            }
 825            break;
 826        case ROCKER_DMA_DESC_CTRL_OFFSET:
 827            desc_ring_set_ctrl(r->rings[index], val);
 828            break;
 829        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 830            if (desc_ring_ret_credits(r->rings[index], val)) {
 831                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 832            }
 833            break;
 834        default:
 835            DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
 836                    " val=0x%08x (ring %d, addr=0x%02x)\n",
 837                    addr, val, index, offset);
 838            break;
 839        }
 840        return;
 841    }
 842
 843    switch (addr) {
 844    case ROCKER_TEST_REG:
 845        r->test_reg = val;
 846        break;
 847    case ROCKER_TEST_REG64:
 848    case ROCKER_TEST_DMA_ADDR:
 849    case ROCKER_PORT_PHYS_ENABLE:
 850        r->lower32 = (uint64_t)val;
 851        break;
 852    case ROCKER_TEST_REG64 + 4:
 853        r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
 854        r->lower32 = 0;
 855        break;
 856    case ROCKER_TEST_IRQ:
 857        rocker_msix_irq(r, val);
 858        break;
 859    case ROCKER_TEST_DMA_SIZE:
 860        r->test_dma_size = val & 0xFFFF;
 861        break;
 862    case ROCKER_TEST_DMA_ADDR + 4:
 863        r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
 864        r->lower32 = 0;
 865        break;
 866    case ROCKER_TEST_DMA_CTRL:
 867        rocker_test_dma_ctrl(r, val);
 868        break;
 869    case ROCKER_CONTROL:
 870        rocker_control(r, val);
 871        break;
 872    case ROCKER_PORT_PHYS_ENABLE + 4:
 873        rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
 874        r->lower32 = 0;
 875        break;
 876    default:
 877        DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
 878                " val=0x%08x\n", addr, val);
 879        break;
 880    }
 881}
 882
 883static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
 884{
 885    Rocker *r = opaque;
 886
 887    if (rocker_addr_is_desc_reg(r, addr)) {
 888        unsigned index = ROCKER_RING_INDEX(addr);
 889        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 890
 891        switch (offset) {
 892        case ROCKER_DMA_DESC_ADDR_OFFSET:
 893            desc_ring_set_base_addr(r->rings[index], val);
 894            break;
 895        default:
 896            DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
 897                    " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
 898                    addr, val, index, offset);
 899            break;
 900        }
 901        return;
 902    }
 903
 904    switch (addr) {
 905    case ROCKER_TEST_REG64:
 906        r->test_reg64 = val;
 907        break;
 908    case ROCKER_TEST_DMA_ADDR:
 909        r->test_dma_addr = val;
 910        break;
 911    case ROCKER_PORT_PHYS_ENABLE:
 912        rocker_port_phys_enable_write(r, val);
 913        break;
 914    default:
 915        DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
 916                " val=0x" TARGET_FMT_plx "\n", addr, val);
 917        break;
 918    }
 919}
 920
 921#ifdef DEBUG_ROCKER
 922#define regname(reg) case (reg): return #reg
 923static const char *rocker_reg_name(void *opaque, hwaddr addr)
 924{
 925    Rocker *r = opaque;
 926
 927    if (rocker_addr_is_desc_reg(r, addr)) {
 928        unsigned index = ROCKER_RING_INDEX(addr);
 929        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 930        static char buf[100];
 931        char ring_name[10];
 932
 933        switch (index) {
 934        case 0:
 935            sprintf(ring_name, "cmd");
 936            break;
 937        case 1:
 938            sprintf(ring_name, "event");
 939            break;
 940        default:
 941            sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
 942                    (index - 2) / 2);
 943        }
 944
 945        switch (offset) {
 946        case ROCKER_DMA_DESC_ADDR_OFFSET:
 947            sprintf(buf, "Ring[%s] ADDR", ring_name);
 948            return buf;
 949        case ROCKER_DMA_DESC_ADDR_OFFSET+4:
 950            sprintf(buf, "Ring[%s] ADDR+4", ring_name);
 951            return buf;
 952        case ROCKER_DMA_DESC_SIZE_OFFSET:
 953            sprintf(buf, "Ring[%s] SIZE", ring_name);
 954            return buf;
 955        case ROCKER_DMA_DESC_HEAD_OFFSET:
 956            sprintf(buf, "Ring[%s] HEAD", ring_name);
 957            return buf;
 958        case ROCKER_DMA_DESC_TAIL_OFFSET:
 959            sprintf(buf, "Ring[%s] TAIL", ring_name);
 960            return buf;
 961        case ROCKER_DMA_DESC_CTRL_OFFSET:
 962            sprintf(buf, "Ring[%s] CTRL", ring_name);
 963            return buf;
 964        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 965            sprintf(buf, "Ring[%s] CREDITS", ring_name);
 966            return buf;
 967        default:
 968            sprintf(buf, "Ring[%s] ???", ring_name);
 969            return buf;
 970        }
 971    } else {
 972        switch (addr) {
 973            regname(ROCKER_BOGUS_REG0);
 974            regname(ROCKER_BOGUS_REG1);
 975            regname(ROCKER_BOGUS_REG2);
 976            regname(ROCKER_BOGUS_REG3);
 977            regname(ROCKER_TEST_REG);
 978            regname(ROCKER_TEST_REG64);
 979            regname(ROCKER_TEST_REG64+4);
 980            regname(ROCKER_TEST_IRQ);
 981            regname(ROCKER_TEST_DMA_ADDR);
 982            regname(ROCKER_TEST_DMA_ADDR+4);
 983            regname(ROCKER_TEST_DMA_SIZE);
 984            regname(ROCKER_TEST_DMA_CTRL);
 985            regname(ROCKER_CONTROL);
 986            regname(ROCKER_PORT_PHYS_COUNT);
 987            regname(ROCKER_PORT_PHYS_LINK_STATUS);
 988            regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
 989            regname(ROCKER_PORT_PHYS_ENABLE);
 990            regname(ROCKER_PORT_PHYS_ENABLE+4);
 991            regname(ROCKER_SWITCH_ID);
 992            regname(ROCKER_SWITCH_ID+4);
 993        }
 994    }
 995    return "???";
 996}
 997#else
 998static const char *rocker_reg_name(void *opaque, hwaddr addr)
 999{
1000    return NULL;
1001}
1002#endif
1003
1004static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1005                              unsigned size)
1006{
1007    DPRINTF("Write %s addr " TARGET_FMT_plx
1008            ", size %u, val " TARGET_FMT_plx "\n",
1009            rocker_reg_name(opaque, addr), addr, size, val);
1010
1011    switch (size) {
1012    case 4:
1013        rocker_io_writel(opaque, addr, val);
1014        break;
1015    case 8:
1016        rocker_io_writeq(opaque, addr, val);
1017        break;
1018    }
1019}
1020
1021static uint64_t rocker_port_phys_link_status(Rocker *r)
1022{
1023    int i;
1024    uint64_t status = 0;
1025
1026    for (i = 0; i < r->fp_ports; i++) {
1027        FpPort *port = r->fp_port[i];
1028
1029        if (fp_port_get_link_up(port)) {
1030            status |= 1 << (i + 1);
1031        }
1032    }
1033    return status;
1034}
1035
1036static uint64_t rocker_port_phys_enable_read(Rocker *r)
1037{
1038    int i;
1039    uint64_t ret = 0;
1040
1041    for (i = 0; i < r->fp_ports; i++) {
1042        FpPort *port = r->fp_port[i];
1043
1044        if (fp_port_enabled(port)) {
1045            ret |= 1 << (i + 1);
1046        }
1047    }
1048    return ret;
1049}
1050
1051static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1052{
1053    Rocker *r = opaque;
1054    uint32_t ret;
1055
1056    if (rocker_addr_is_desc_reg(r, addr)) {
1057        unsigned index = ROCKER_RING_INDEX(addr);
1058        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1059
1060        switch (offset) {
1061        case ROCKER_DMA_DESC_ADDR_OFFSET:
1062            ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1063            break;
1064        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1065            ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1066            break;
1067        case ROCKER_DMA_DESC_SIZE_OFFSET:
1068            ret = desc_ring_get_size(r->rings[index]);
1069            break;
1070        case ROCKER_DMA_DESC_HEAD_OFFSET:
1071            ret = desc_ring_get_head(r->rings[index]);
1072            break;
1073        case ROCKER_DMA_DESC_TAIL_OFFSET:
1074            ret = desc_ring_get_tail(r->rings[index]);
1075            break;
1076        case ROCKER_DMA_DESC_CREDITS_OFFSET:
1077            ret = desc_ring_get_credits(r->rings[index]);
1078            break;
1079        default:
1080            DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1081                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1082            ret = 0;
1083            break;
1084        }
1085        return ret;
1086    }
1087
1088    switch (addr) {
1089    case ROCKER_BOGUS_REG0:
1090    case ROCKER_BOGUS_REG1:
1091    case ROCKER_BOGUS_REG2:
1092    case ROCKER_BOGUS_REG3:
1093        ret = 0xDEADBABE;
1094        break;
1095    case ROCKER_TEST_REG:
1096        ret = r->test_reg * 2;
1097        break;
1098    case ROCKER_TEST_REG64:
1099        ret = (uint32_t)(r->test_reg64 * 2);
1100        break;
1101    case ROCKER_TEST_REG64 + 4:
1102        ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1103        break;
1104    case ROCKER_TEST_DMA_SIZE:
1105        ret = r->test_dma_size;
1106        break;
1107    case ROCKER_TEST_DMA_ADDR:
1108        ret = (uint32_t)r->test_dma_addr;
1109        break;
1110    case ROCKER_TEST_DMA_ADDR + 4:
1111        ret = (uint32_t)(r->test_dma_addr >> 32);
1112        break;
1113    case ROCKER_PORT_PHYS_COUNT:
1114        ret = r->fp_ports;
1115        break;
1116    case ROCKER_PORT_PHYS_LINK_STATUS:
1117        ret = (uint32_t)rocker_port_phys_link_status(r);
1118        break;
1119    case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1120        ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1121        break;
1122    case ROCKER_PORT_PHYS_ENABLE:
1123        ret = (uint32_t)rocker_port_phys_enable_read(r);
1124        break;
1125    case ROCKER_PORT_PHYS_ENABLE + 4:
1126        ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1127        break;
1128    case ROCKER_SWITCH_ID:
1129        ret = (uint32_t)r->switch_id;
1130        break;
1131    case ROCKER_SWITCH_ID + 4:
1132        ret = (uint32_t)(r->switch_id >> 32);
1133        break;
1134    default:
1135        DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1136        ret = 0;
1137        break;
1138    }
1139    return ret;
1140}
1141
1142static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1143{
1144    Rocker *r = opaque;
1145    uint64_t ret;
1146
1147    if (rocker_addr_is_desc_reg(r, addr)) {
1148        unsigned index = ROCKER_RING_INDEX(addr);
1149        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1150
1151        switch (addr & ROCKER_DMA_DESC_MASK) {
1152        case ROCKER_DMA_DESC_ADDR_OFFSET:
1153            ret = desc_ring_get_base_addr(r->rings[index]);
1154            break;
1155        default:
1156            DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1157                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1158            ret = 0;
1159            break;
1160        }
1161        return ret;
1162    }
1163
1164    switch (addr) {
1165    case ROCKER_BOGUS_REG0:
1166    case ROCKER_BOGUS_REG2:
1167        ret = 0xDEADBABEDEADBABEULL;
1168        break;
1169    case ROCKER_TEST_REG64:
1170        ret = r->test_reg64 * 2;
1171        break;
1172    case ROCKER_TEST_DMA_ADDR:
1173        ret = r->test_dma_addr;
1174        break;
1175    case ROCKER_PORT_PHYS_LINK_STATUS:
1176        ret = rocker_port_phys_link_status(r);
1177        break;
1178    case ROCKER_PORT_PHYS_ENABLE:
1179        ret = rocker_port_phys_enable_read(r);
1180        break;
1181    case ROCKER_SWITCH_ID:
1182        ret = r->switch_id;
1183        break;
1184    default:
1185        DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1186        ret = 0;
1187        break;
1188    }
1189    return ret;
1190}
1191
1192static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1193{
1194    DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1195            rocker_reg_name(opaque, addr), addr, size);
1196
1197    switch (size) {
1198    case 4:
1199        return rocker_io_readl(opaque, addr);
1200    case 8:
1201        return rocker_io_readq(opaque, addr);
1202    }
1203
1204    return -1;
1205}
1206
1207static const MemoryRegionOps rocker_mmio_ops = {
1208    .read = rocker_mmio_read,
1209    .write = rocker_mmio_write,
1210    .endianness = DEVICE_LITTLE_ENDIAN,
1211    .valid = {
1212        .min_access_size = 4,
1213        .max_access_size = 8,
1214    },
1215    .impl = {
1216        .min_access_size = 4,
1217        .max_access_size = 8,
1218    },
1219};
1220
1221static void rocker_msix_vectors_unuse(Rocker *r,
1222                                      unsigned int num_vectors)
1223{
1224    PCIDevice *dev = PCI_DEVICE(r);
1225    int i;
1226
1227    for (i = 0; i < num_vectors; i++) {
1228        msix_vector_unuse(dev, i);
1229    }
1230}
1231
1232static int rocker_msix_vectors_use(Rocker *r,
1233                                   unsigned int num_vectors)
1234{
1235    PCIDevice *dev = PCI_DEVICE(r);
1236    int err;
1237    int i;
1238
1239    for (i = 0; i < num_vectors; i++) {
1240        err = msix_vector_use(dev, i);
1241        if (err) {
1242            goto rollback;
1243        }
1244    }
1245    return 0;
1246
1247rollback:
1248    rocker_msix_vectors_unuse(r, i);
1249    return err;
1250}
1251
1252static int rocker_msix_init(Rocker *r)
1253{
1254    PCIDevice *dev = PCI_DEVICE(r);
1255    int err;
1256    Error *local_err = NULL;
1257
1258    err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1259                    &r->msix_bar,
1260                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1261                    &r->msix_bar,
1262                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1263                    0, &local_err);
1264    if (err) {
1265        error_report_err(local_err);
1266        return err;
1267    }
1268
1269    err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1270    if (err) {
1271        goto err_msix_vectors_use;
1272    }
1273
1274    return 0;
1275
1276err_msix_vectors_use:
1277    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1278    return err;
1279}
1280
1281static void rocker_msix_uninit(Rocker *r)
1282{
1283    PCIDevice *dev = PCI_DEVICE(r);
1284
1285    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1286    rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1287}
1288
1289static World *rocker_world_type_by_name(Rocker *r, const char *name)
1290{
1291    int i;
1292
1293    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1294        if (strcmp(name, world_name(r->worlds[i])) == 0) {
1295            return r->worlds[i];
1296        }
1297    }
1298    return NULL;
1299}
1300
1301static int pci_rocker_init(PCIDevice *dev)
1302{
1303    Rocker *r = to_rocker(dev);
1304    const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1305    const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1306    static int sw_index;
1307    int i, err = 0;
1308
1309    /* allocate worlds */
1310
1311    r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1312
1313    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1314        if (!r->worlds[i]) {
1315            err = -ENOMEM;
1316            goto err_world_alloc;
1317        }
1318    }
1319
1320    if (!r->world_name) {
1321        r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1322    }
1323
1324    r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1325    if (!r->world_dflt) {
1326        fprintf(stderr,
1327                "rocker: requested world \"%s\" does not exist\n",
1328                r->world_name);
1329        err = -EINVAL;
1330        goto err_world_type_by_name;
1331    }
1332
1333    /* set up memory-mapped region at BAR0 */
1334
1335    memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1336                          "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1337    pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1338                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1339
1340    /* set up memory-mapped region for MSI-X */
1341
1342    memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1343                       ROCKER_PCI_MSIX_BAR_SIZE);
1344    pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1345                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1346
1347    /* MSI-X init */
1348
1349    err = rocker_msix_init(r);
1350    if (err) {
1351        goto err_msix_init;
1352    }
1353
1354    /* validate switch properties */
1355
1356    if (!r->name) {
1357        r->name = g_strdup(ROCKER);
1358    }
1359
1360    if (rocker_find(r->name)) {
1361        err = -EEXIST;
1362        goto err_duplicate;
1363    }
1364
1365    /* Rocker name is passed in port name requests to OS with the intention
1366     * that the name is used in interface names. Limit the length of the
1367     * rocker name to avoid naming problems in the OS. Also, adding the
1368     * port number as p# and unganged breakout b#, where # is at most 2
1369     * digits, so leave room for it too (-1 for string terminator, -3 for
1370     * p# and -3 for b#)
1371     */
1372#define ROCKER_IFNAMSIZ 16
1373#define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1374    if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1375        fprintf(stderr,
1376                "rocker: name too long; please shorten to at most %d chars\n",
1377                MAX_ROCKER_NAME_LEN);
1378        return -EINVAL;
1379    }
1380
1381    if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1382        memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1383        r->fp_start_macaddr.a[4] += (sw_index++);
1384    }
1385
1386    if (!r->switch_id) {
1387        memcpy(&r->switch_id, &r->fp_start_macaddr,
1388               sizeof(r->fp_start_macaddr));
1389    }
1390
1391    if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1392        r->fp_ports = ROCKER_FP_PORTS_MAX;
1393    }
1394
1395    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1396    if (!r->rings) {
1397        goto err_rings_alloc;
1398    }
1399
1400    /* Rings are ordered like this:
1401     * - command ring
1402     * - event ring
1403     * - port0 tx ring
1404     * - port0 rx ring
1405     * - port1 tx ring
1406     * - port1 rx ring
1407     * .....
1408     */
1409
1410    err = -ENOMEM;
1411    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1412        DescRing *ring = desc_ring_alloc(r, i);
1413
1414        if (!ring) {
1415            goto err_ring_alloc;
1416        }
1417
1418        if (i == ROCKER_RING_CMD) {
1419            desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1420        } else if (i == ROCKER_RING_EVENT) {
1421            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1422        } else if (i % 2 == 0) {
1423            desc_ring_set_consume(ring, tx_consume,
1424                                  ROCKER_MSIX_VEC_TX((i - 2) / 2));
1425        } else if (i % 2 == 1) {
1426            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1427        }
1428
1429        r->rings[i] = ring;
1430    }
1431
1432    for (i = 0; i < r->fp_ports; i++) {
1433        FpPort *port =
1434            fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1435                          i, &r->fp_ports_peers[i]);
1436
1437        if (!port) {
1438            goto err_port_alloc;
1439        }
1440
1441        r->fp_port[i] = port;
1442        fp_port_set_world(port, r->world_dflt);
1443    }
1444
1445    QLIST_INSERT_HEAD(&rockers, r, next);
1446
1447    return 0;
1448
1449err_port_alloc:
1450    for (--i; i >= 0; i--) {
1451        FpPort *port = r->fp_port[i];
1452        fp_port_free(port);
1453    }
1454    i = rocker_pci_ring_count(r);
1455err_ring_alloc:
1456    for (--i; i >= 0; i--) {
1457        desc_ring_free(r->rings[i]);
1458    }
1459    g_free(r->rings);
1460err_rings_alloc:
1461err_duplicate:
1462    rocker_msix_uninit(r);
1463err_msix_init:
1464    object_unparent(OBJECT(&r->msix_bar));
1465    object_unparent(OBJECT(&r->mmio));
1466err_world_type_by_name:
1467err_world_alloc:
1468    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1469        if (r->worlds[i]) {
1470            world_free(r->worlds[i]);
1471        }
1472    }
1473    return err;
1474}
1475
1476static void pci_rocker_uninit(PCIDevice *dev)
1477{
1478    Rocker *r = to_rocker(dev);
1479    int i;
1480
1481    QLIST_REMOVE(r, next);
1482
1483    for (i = 0; i < r->fp_ports; i++) {
1484        FpPort *port = r->fp_port[i];
1485
1486        fp_port_free(port);
1487        r->fp_port[i] = NULL;
1488    }
1489
1490    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1491        if (r->rings[i]) {
1492            desc_ring_free(r->rings[i]);
1493        }
1494    }
1495    g_free(r->rings);
1496
1497    rocker_msix_uninit(r);
1498    object_unparent(OBJECT(&r->msix_bar));
1499    object_unparent(OBJECT(&r->mmio));
1500
1501    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1502        if (r->worlds[i]) {
1503            world_free(r->worlds[i]);
1504        }
1505    }
1506    g_free(r->fp_ports_peers);
1507}
1508
1509static void rocker_reset(DeviceState *dev)
1510{
1511    Rocker *r = to_rocker(dev);
1512    int i;
1513
1514    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1515        if (r->worlds[i]) {
1516            world_reset(r->worlds[i]);
1517        }
1518    }
1519    for (i = 0; i < r->fp_ports; i++) {
1520        fp_port_reset(r->fp_port[i]);
1521        fp_port_set_world(r->fp_port[i], r->world_dflt);
1522    }
1523
1524    r->test_reg = 0;
1525    r->test_reg64 = 0;
1526    r->test_dma_addr = 0;
1527    r->test_dma_size = 0;
1528
1529    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1530        desc_ring_reset(r->rings[i]);
1531    }
1532
1533    DPRINTF("Reset done\n");
1534}
1535
1536static Property rocker_properties[] = {
1537    DEFINE_PROP_STRING("name", Rocker, name),
1538    DEFINE_PROP_STRING("world", Rocker, world_name),
1539    DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1540                        fp_start_macaddr),
1541    DEFINE_PROP_UINT64("switch_id", Rocker,
1542                       switch_id, 0),
1543    DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1544                      fp_ports_peers, qdev_prop_netdev, NICPeers),
1545    DEFINE_PROP_END_OF_LIST(),
1546};
1547
1548static const VMStateDescription rocker_vmsd = {
1549    .name = ROCKER,
1550    .unmigratable = 1,
1551};
1552
1553static void rocker_class_init(ObjectClass *klass, void *data)
1554{
1555    DeviceClass *dc = DEVICE_CLASS(klass);
1556    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1557
1558    k->init = pci_rocker_init;
1559    k->exit = pci_rocker_uninit;
1560    k->vendor_id = PCI_VENDOR_ID_REDHAT;
1561    k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1562    k->revision = ROCKER_PCI_REVISION;
1563    k->class_id = PCI_CLASS_NETWORK_OTHER;
1564    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1565    dc->desc = "Rocker Switch";
1566    dc->reset = rocker_reset;
1567    dc->props = rocker_properties;
1568    dc->vmsd = &rocker_vmsd;
1569}
1570
1571static const TypeInfo rocker_info = {
1572    .name          = ROCKER,
1573    .parent        = TYPE_PCI_DEVICE,
1574    .instance_size = sizeof(Rocker),
1575    .class_init    = rocker_class_init,
1576};
1577
1578static void rocker_register_types(void)
1579{
1580    type_register_static(&rocker_info);
1581}
1582
1583type_init(rocker_register_types)
1584