qemu/hw/net/rocker/rocker.c
<<
>>
Prefs
   1/*
   2 * QEMU rocker switch emulation - PCI device
   3 *
   4 * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
   5 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "hw/pci/pci.h"
  20#include "hw/qdev-properties.h"
  21#include "migration/vmstate.h"
  22#include "hw/pci/msix.h"
  23#include "net/net.h"
  24#include "net/eth.h"
  25#include "qapi/error.h"
  26#include "qapi/qapi-commands-rocker.h"
  27#include "qemu/iov.h"
  28#include "qemu/module.h"
  29#include "qemu/bitops.h"
  30#include "qemu/log.h"
  31
  32#include "rocker.h"
  33#include "rocker_hw.h"
  34#include "rocker_fp.h"
  35#include "rocker_desc.h"
  36#include "rocker_tlv.h"
  37#include "rocker_world.h"
  38#include "rocker_of_dpa.h"
  39
  40struct rocker {
  41    /* private */
  42    PCIDevice parent_obj;
  43    /* public */
  44
  45    MemoryRegion mmio;
  46    MemoryRegion msix_bar;
  47
  48    /* switch configuration */
  49    char *name;                  /* switch name */
  50    char *world_name;            /* world name */
  51    uint32_t fp_ports;           /* front-panel port count */
  52    NICPeers *fp_ports_peers;
  53    MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
  54    uint64_t switch_id;          /* switch id */
  55
  56    /* front-panel ports */
  57    FpPort *fp_port[ROCKER_FP_PORTS_MAX];
  58
  59    /* register backings */
  60    uint32_t test_reg;
  61    uint64_t test_reg64;
  62    dma_addr_t test_dma_addr;
  63    uint32_t test_dma_size;
  64    uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
  65
  66    /* desc rings */
  67    DescRing **rings;
  68
  69    /* switch worlds */
  70    World *worlds[ROCKER_WORLD_TYPE_MAX];
  71    World *world_dflt;
  72
  73    QLIST_ENTRY(rocker) next;
  74};
  75
  76#define TYPE_ROCKER "rocker"
  77
  78#define ROCKER(obj) \
  79    OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
  80
  81static QLIST_HEAD(, rocker) rockers;
  82
  83Rocker *rocker_find(const char *name)
  84{
  85    Rocker *r;
  86
  87    QLIST_FOREACH(r, &rockers, next)
  88        if (strcmp(r->name, name) == 0) {
  89            return r;
  90        }
  91
  92    return NULL;
  93}
  94
  95World *rocker_get_world(Rocker *r, enum rocker_world_type type)
  96{
  97    if (type < ROCKER_WORLD_TYPE_MAX) {
  98        return r->worlds[type];
  99    }
 100    return NULL;
 101}
 102
 103RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 104{
 105    RockerSwitch *rocker;
 106    Rocker *r;
 107
 108    r = rocker_find(name);
 109    if (!r) {
 110        error_setg(errp, "rocker %s not found", name);
 111        return NULL;
 112    }
 113
 114    rocker = g_new0(RockerSwitch, 1);
 115    rocker->name = g_strdup(r->name);
 116    rocker->id = r->switch_id;
 117    rocker->ports = r->fp_ports;
 118
 119    return rocker;
 120}
 121
 122RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 123{
 124    RockerPortList *list = NULL;
 125    Rocker *r;
 126    int i;
 127
 128    r = rocker_find(name);
 129    if (!r) {
 130        error_setg(errp, "rocker %s not found", name);
 131        return NULL;
 132    }
 133
 134    for (i = r->fp_ports - 1; i >= 0; i--) {
 135        RockerPortList *info = g_malloc0(sizeof(*info));
 136        info->value = g_malloc0(sizeof(*info->value));
 137        struct fp_port *port = r->fp_port[i];
 138
 139        fp_port_get_info(port, info);
 140        info->next = list;
 141        list = info;
 142    }
 143
 144    return list;
 145}
 146
 147uint32_t rocker_fp_ports(Rocker *r)
 148{
 149    return r->fp_ports;
 150}
 151
 152static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
 153                                            DescRing *ring)
 154{
 155    return (desc_ring_index(ring) - 2) / 2 + 1;
 156}
 157
 158static int tx_consume(Rocker *r, DescInfo *info)
 159{
 160    PCIDevice *dev = PCI_DEVICE(r);
 161    char *buf = desc_get_buf(info, true);
 162    RockerTlv *tlv_frag;
 163    RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
 164    struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
 165    uint32_t pport;
 166    uint32_t port;
 167    uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
 168    uint16_t tx_l3_csum_off = 0;
 169    uint16_t tx_tso_mss = 0;
 170    uint16_t tx_tso_hdr_len = 0;
 171    int iovcnt = 0;
 172    int err = ROCKER_OK;
 173    int rem;
 174    int i;
 175
 176    if (!buf) {
 177        return -ROCKER_ENXIO;
 178    }
 179
 180    rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
 181
 182    if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
 183        return -ROCKER_EINVAL;
 184    }
 185
 186    pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
 187    if (!fp_port_from_pport(pport, &port)) {
 188        return -ROCKER_EINVAL;
 189    }
 190
 191    if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
 192        tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
 193    }
 194
 195    switch (tx_offload) {
 196    case ROCKER_TX_OFFLOAD_L3_CSUM:
 197        if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 198            return -ROCKER_EINVAL;
 199        }
 200        break;
 201    case ROCKER_TX_OFFLOAD_TSO:
 202        if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
 203            !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 204            return -ROCKER_EINVAL;
 205        }
 206        break;
 207    }
 208
 209    if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 210        tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
 211        qemu_log_mask(LOG_UNIMP, "rocker %s: L3 not implemented"
 212                                 " (cksum off: %u)\n",
 213                      __func__, tx_l3_csum_off);
 214    }
 215
 216    if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
 217        tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
 218        qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented (MSS: %u)\n",
 219                      __func__, tx_tso_mss);
 220    }
 221
 222    if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 223        tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
 224        qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented"
 225                                 " (hdr length: %u)\n",
 226                      __func__, tx_tso_hdr_len);
 227    }
 228
 229    rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
 230        hwaddr frag_addr;
 231        uint16_t frag_len;
 232
 233        if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
 234            err = -ROCKER_EINVAL;
 235            goto err_bad_attr;
 236        }
 237
 238        rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
 239
 240        if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
 241            !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
 242            err = -ROCKER_EINVAL;
 243            goto err_bad_attr;
 244        }
 245
 246        frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 247        frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
 248
 249        if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
 250            goto err_too_many_frags;
 251        }
 252        iov[iovcnt].iov_len = frag_len;
 253        iov[iovcnt].iov_base = g_malloc(frag_len);
 254
 255        pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
 256                     iov[iovcnt].iov_len);
 257
 258        iovcnt++;
 259    }
 260
 261    err = fp_port_eg(r->fp_port[port], iov, iovcnt);
 262
 263err_too_many_frags:
 264err_bad_attr:
 265    for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
 266        g_free(iov[i].iov_base);
 267    }
 268
 269    return err;
 270}
 271
 272static int cmd_get_port_settings(Rocker *r,
 273                                 DescInfo *info, char *buf,
 274                                 RockerTlv *cmd_info_tlv)
 275{
 276    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 277    RockerTlv *nest;
 278    FpPort *fp_port;
 279    uint32_t pport;
 280    uint32_t port;
 281    uint32_t speed;
 282    uint8_t duplex;
 283    uint8_t autoneg;
 284    uint8_t learning;
 285    char *phys_name;
 286    MACAddr macaddr;
 287    enum rocker_world_type mode;
 288    size_t tlv_size;
 289    int pos;
 290    int err;
 291
 292    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 293                            cmd_info_tlv);
 294
 295    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 296        return -ROCKER_EINVAL;
 297    }
 298
 299    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 300    if (!fp_port_from_pport(pport, &port)) {
 301        return -ROCKER_EINVAL;
 302    }
 303    fp_port = r->fp_port[port];
 304
 305    err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
 306    if (err) {
 307        return err;
 308    }
 309
 310    fp_port_get_macaddr(fp_port, &macaddr);
 311    mode = world_type(fp_port_get_world(fp_port));
 312    learning = fp_port_get_learning(fp_port);
 313    phys_name = fp_port_get_name(fp_port);
 314
 315    tlv_size = rocker_tlv_total_size(0) +                 /* nest */
 316               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 317               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
 318               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
 319               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
 320               rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
 321               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
 322               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
 323               rocker_tlv_total_size(strlen(phys_name));
 324
 325    if (tlv_size > desc_buf_size(info)) {
 326        return -ROCKER_EMSGSIZE;
 327    }
 328
 329    pos = 0;
 330    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
 331    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
 332    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
 333    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
 334    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
 335    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
 336                   sizeof(macaddr.a), macaddr.a);
 337    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
 338    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
 339                      learning);
 340    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
 341                   strlen(phys_name), phys_name);
 342    rocker_tlv_nest_end(buf, &pos, nest);
 343
 344    return desc_set_buf(info, tlv_size);
 345}
 346
 347static int cmd_set_port_settings(Rocker *r,
 348                                 RockerTlv *cmd_info_tlv)
 349{
 350    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 351    FpPort *fp_port;
 352    uint32_t pport;
 353    uint32_t port;
 354    uint32_t speed;
 355    uint8_t duplex;
 356    uint8_t autoneg;
 357    uint8_t learning;
 358    MACAddr macaddr;
 359    enum rocker_world_type mode;
 360    int err;
 361
 362    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 363                            cmd_info_tlv);
 364
 365    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 366        return -ROCKER_EINVAL;
 367    }
 368
 369    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 370    if (!fp_port_from_pport(pport, &port)) {
 371        return -ROCKER_EINVAL;
 372    }
 373    fp_port = r->fp_port[port];
 374
 375    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
 376        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
 377        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
 378
 379        speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
 380        duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
 381        autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 382
 383        err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
 384        if (err) {
 385            return err;
 386        }
 387    }
 388
 389    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
 390        if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
 391            sizeof(macaddr.a)) {
 392            return -ROCKER_EINVAL;
 393        }
 394        memcpy(macaddr.a,
 395               rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
 396               sizeof(macaddr.a));
 397        fp_port_set_macaddr(fp_port, &macaddr);
 398    }
 399
 400    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 401        mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
 402        if (mode >= ROCKER_WORLD_TYPE_MAX) {
 403            return -ROCKER_EINVAL;
 404        }
 405        /* We don't support world change. */
 406        if (!fp_port_check_world(fp_port, r->worlds[mode])) {
 407            return -ROCKER_EINVAL;
 408        }
 409    }
 410
 411    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
 412        learning =
 413            rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
 414        fp_port_set_learning(fp_port, learning);
 415    }
 416
 417    return ROCKER_OK;
 418}
 419
 420static int cmd_consume(Rocker *r, DescInfo *info)
 421{
 422    char *buf = desc_get_buf(info, false);
 423    RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
 424    RockerTlv *info_tlv;
 425    World *world;
 426    uint16_t cmd;
 427    int err;
 428
 429    if (!buf) {
 430        return -ROCKER_ENXIO;
 431    }
 432
 433    rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
 434
 435    if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
 436        return -ROCKER_EINVAL;
 437    }
 438
 439    cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
 440    info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
 441
 442    /* This might be reworked to something like this:
 443     * Every world will have an array of command handlers from
 444     * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
 445     * up to each world to implement whatever command it want.
 446     * It can reference "generic" commands as cmd_set_port_settings or
 447     * cmd_get_port_settings
 448     */
 449
 450    switch (cmd) {
 451    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
 452    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
 453    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
 454    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
 455    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
 456    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
 457    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
 458    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
 459        world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 460        err = world_do_cmd(world, info, buf, cmd, info_tlv);
 461        break;
 462    case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
 463        err = cmd_get_port_settings(r, info, buf, info_tlv);
 464        break;
 465    case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
 466        err = cmd_set_port_settings(r, info_tlv);
 467        break;
 468    default:
 469        err = -ROCKER_EINVAL;
 470        break;
 471    }
 472
 473    return err;
 474}
 475
 476static void rocker_msix_irq(Rocker *r, unsigned vector)
 477{
 478    PCIDevice *dev = PCI_DEVICE(r);
 479
 480    DPRINTF("MSI-X notify request for vector %d\n", vector);
 481    if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
 482        DPRINTF("incorrect vector %d\n", vector);
 483        return;
 484    }
 485    msix_notify(dev, vector);
 486}
 487
 488int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
 489{
 490    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 491    DescInfo *info = desc_ring_fetch_desc(ring);
 492    RockerTlv *nest;
 493    char *buf;
 494    size_t tlv_size;
 495    int pos;
 496    int err;
 497
 498    if (!info) {
 499        return -ROCKER_ENOBUFS;
 500    }
 501
 502    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 503               rocker_tlv_total_size(0) +                 /* nest */
 504               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 505               rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
 506
 507    if (tlv_size > desc_buf_size(info)) {
 508        err = -ROCKER_EMSGSIZE;
 509        goto err_too_big;
 510    }
 511
 512    buf = desc_get_buf(info, false);
 513    if (!buf) {
 514        err = -ROCKER_ENOMEM;
 515        goto err_no_mem;
 516    }
 517
 518    pos = 0;
 519    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 520                        ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
 521    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 522    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
 523    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
 524                      link_up ? 1 : 0);
 525    rocker_tlv_nest_end(buf, &pos, nest);
 526
 527    err = desc_set_buf(info, tlv_size);
 528
 529err_too_big:
 530err_no_mem:
 531    if (desc_ring_post_desc(ring, err)) {
 532        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 533    }
 534
 535    return err;
 536}
 537
 538int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
 539                               uint16_t vlan_id)
 540{
 541    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 542    DescInfo *info;
 543    FpPort *fp_port;
 544    uint32_t port;
 545    RockerTlv *nest;
 546    char *buf;
 547    size_t tlv_size;
 548    int pos;
 549    int err;
 550
 551    if (!fp_port_from_pport(pport, &port)) {
 552        return -ROCKER_EINVAL;
 553    }
 554    fp_port = r->fp_port[port];
 555    if (!fp_port_get_learning(fp_port)) {
 556        return ROCKER_OK;
 557    }
 558
 559    info = desc_ring_fetch_desc(ring);
 560    if (!info) {
 561        return -ROCKER_ENOBUFS;
 562    }
 563
 564    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 565               rocker_tlv_total_size(0) +                 /* nest */
 566               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 567               rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
 568               rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
 569
 570    if (tlv_size > desc_buf_size(info)) {
 571        err = -ROCKER_EMSGSIZE;
 572        goto err_too_big;
 573    }
 574
 575    buf = desc_get_buf(info, false);
 576    if (!buf) {
 577        err = -ROCKER_ENOMEM;
 578        goto err_no_mem;
 579    }
 580
 581    pos = 0;
 582    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 583                        ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
 584    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 585    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
 586    rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
 587    rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
 588    rocker_tlv_nest_end(buf, &pos, nest);
 589
 590    err = desc_set_buf(info, tlv_size);
 591
 592err_too_big:
 593err_no_mem:
 594    if (desc_ring_post_desc(ring, err)) {
 595        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 596    }
 597
 598    return err;
 599}
 600
 601static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
 602                                                     uint32_t pport)
 603{
 604    return r->rings[(pport - 1) * 2 + 3];
 605}
 606
 607int rx_produce(World *world, uint32_t pport,
 608               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 609{
 610    Rocker *r = world_rocker(world);
 611    PCIDevice *dev = (PCIDevice *)r;
 612    DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
 613    DescInfo *info = desc_ring_fetch_desc(ring);
 614    char *data;
 615    size_t data_size = iov_size(iov, iovcnt);
 616    char *buf;
 617    uint16_t rx_flags = 0;
 618    uint16_t rx_csum = 0;
 619    size_t tlv_size;
 620    RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
 621    hwaddr frag_addr;
 622    uint16_t frag_max_len;
 623    int pos;
 624    int err;
 625
 626    if (!info) {
 627        return -ROCKER_ENOBUFS;
 628    }
 629
 630    buf = desc_get_buf(info, false);
 631    if (!buf) {
 632        err = -ROCKER_ENXIO;
 633        goto out;
 634    }
 635    rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
 636
 637    if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
 638        !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
 639        err = -ROCKER_EINVAL;
 640        goto out;
 641    }
 642
 643    frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
 644    frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
 645
 646    if (data_size > frag_max_len) {
 647        err = -ROCKER_EMSGSIZE;
 648        goto out;
 649    }
 650
 651    if (copy_to_cpu) {
 652        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
 653    }
 654
 655    /* XXX calc rx flags/csum */
 656
 657    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
 658               rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
 659               rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
 660               rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
 661               rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
 662
 663    if (tlv_size > desc_buf_size(info)) {
 664        err = -ROCKER_EMSGSIZE;
 665        goto out;
 666    }
 667
 668    /* TODO:
 669     * iov dma write can be optimized in similar way e1000 does it in
 670     * e1000_receive_iov. But maybe if would make sense to introduce
 671     * generic helper iov_dma_write.
 672     */
 673
 674    data = g_malloc(data_size);
 675
 676    iov_to_buf(iov, iovcnt, 0, data, data_size);
 677    pci_dma_write(dev, frag_addr, data, data_size);
 678    g_free(data);
 679
 680    pos = 0;
 681    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
 682    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
 683    rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
 684    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
 685    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
 686
 687    err = desc_set_buf(info, tlv_size);
 688
 689out:
 690    if (desc_ring_post_desc(ring, err)) {
 691        rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
 692    }
 693
 694    return err;
 695}
 696
 697int rocker_port_eg(Rocker *r, uint32_t pport,
 698                   const struct iovec *iov, int iovcnt)
 699{
 700    FpPort *fp_port;
 701    uint32_t port;
 702
 703    if (!fp_port_from_pport(pport, &port)) {
 704        return -ROCKER_EINVAL;
 705    }
 706
 707    fp_port = r->fp_port[port];
 708
 709    return fp_port_eg(fp_port, iov, iovcnt);
 710}
 711
 712static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
 713{
 714    PCIDevice *dev = PCI_DEVICE(r);
 715    char *buf;
 716    int i;
 717
 718    buf = g_malloc(r->test_dma_size);
 719
 720    switch (val) {
 721    case ROCKER_TEST_DMA_CTRL_CLEAR:
 722        memset(buf, 0, r->test_dma_size);
 723        break;
 724    case ROCKER_TEST_DMA_CTRL_FILL:
 725        memset(buf, 0x96, r->test_dma_size);
 726        break;
 727    case ROCKER_TEST_DMA_CTRL_INVERT:
 728        pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
 729        for (i = 0; i < r->test_dma_size; i++) {
 730            buf[i] = ~buf[i];
 731        }
 732        break;
 733    default:
 734        DPRINTF("not test dma control val=0x%08x\n", val);
 735        goto err_out;
 736    }
 737    pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
 738
 739    rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
 740
 741err_out:
 742    g_free(buf);
 743}
 744
 745static void rocker_reset(DeviceState *dev);
 746
 747static void rocker_control(Rocker *r, uint32_t val)
 748{
 749    if (val & ROCKER_CONTROL_RESET) {
 750        rocker_reset(DEVICE(r));
 751    }
 752}
 753
 754static int rocker_pci_ring_count(Rocker *r)
 755{
 756    /* There are:
 757     * - command ring
 758     * - event ring
 759     * - tx and rx ring per each port
 760     */
 761    return 2 + (2 * r->fp_ports);
 762}
 763
 764static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
 765{
 766    hwaddr start = ROCKER_DMA_DESC_BASE;
 767    hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
 768
 769    return addr >= start && addr < end;
 770}
 771
 772static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
 773{
 774    int i;
 775    bool old_enabled;
 776    bool new_enabled;
 777    FpPort *fp_port;
 778
 779    for (i = 0; i < r->fp_ports; i++) {
 780        fp_port = r->fp_port[i];
 781        old_enabled = fp_port_enabled(fp_port);
 782        new_enabled = (new >> (i + 1)) & 0x1;
 783        if (new_enabled == old_enabled) {
 784            continue;
 785        }
 786        if (new_enabled) {
 787            fp_port_enable(r->fp_port[i]);
 788        } else {
 789            fp_port_disable(r->fp_port[i]);
 790        }
 791    }
 792}
 793
 794static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
 795{
 796    Rocker *r = opaque;
 797
 798    if (rocker_addr_is_desc_reg(r, addr)) {
 799        unsigned index = ROCKER_RING_INDEX(addr);
 800        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 801
 802        switch (offset) {
 803        case ROCKER_DMA_DESC_ADDR_OFFSET:
 804            r->lower32 = (uint64_t)val;
 805            break;
 806        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
 807            desc_ring_set_base_addr(r->rings[index],
 808                                    ((uint64_t)val) << 32 | r->lower32);
 809            r->lower32 = 0;
 810            break;
 811        case ROCKER_DMA_DESC_SIZE_OFFSET:
 812            desc_ring_set_size(r->rings[index], val);
 813            break;
 814        case ROCKER_DMA_DESC_HEAD_OFFSET:
 815            if (desc_ring_set_head(r->rings[index], val)) {
 816                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 817            }
 818            break;
 819        case ROCKER_DMA_DESC_CTRL_OFFSET:
 820            desc_ring_set_ctrl(r->rings[index], val);
 821            break;
 822        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 823            if (desc_ring_ret_credits(r->rings[index], val)) {
 824                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 825            }
 826            break;
 827        default:
 828            DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
 829                    " val=0x%08x (ring %d, addr=0x%02x)\n",
 830                    addr, val, index, offset);
 831            break;
 832        }
 833        return;
 834    }
 835
 836    switch (addr) {
 837    case ROCKER_TEST_REG:
 838        r->test_reg = val;
 839        break;
 840    case ROCKER_TEST_REG64:
 841    case ROCKER_TEST_DMA_ADDR:
 842    case ROCKER_PORT_PHYS_ENABLE:
 843        r->lower32 = (uint64_t)val;
 844        break;
 845    case ROCKER_TEST_REG64 + 4:
 846        r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
 847        r->lower32 = 0;
 848        break;
 849    case ROCKER_TEST_IRQ:
 850        rocker_msix_irq(r, val);
 851        break;
 852    case ROCKER_TEST_DMA_SIZE:
 853        r->test_dma_size = val & 0xFFFF;
 854        break;
 855    case ROCKER_TEST_DMA_ADDR + 4:
 856        r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
 857        r->lower32 = 0;
 858        break;
 859    case ROCKER_TEST_DMA_CTRL:
 860        rocker_test_dma_ctrl(r, val);
 861        break;
 862    case ROCKER_CONTROL:
 863        rocker_control(r, val);
 864        break;
 865    case ROCKER_PORT_PHYS_ENABLE + 4:
 866        rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
 867        r->lower32 = 0;
 868        break;
 869    default:
 870        DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
 871                " val=0x%08x\n", addr, val);
 872        break;
 873    }
 874}
 875
 876static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
 877{
 878    Rocker *r = opaque;
 879
 880    if (rocker_addr_is_desc_reg(r, addr)) {
 881        unsigned index = ROCKER_RING_INDEX(addr);
 882        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 883
 884        switch (offset) {
 885        case ROCKER_DMA_DESC_ADDR_OFFSET:
 886            desc_ring_set_base_addr(r->rings[index], val);
 887            break;
 888        default:
 889            DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
 890                    " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
 891                    addr, val, index, offset);
 892            break;
 893        }
 894        return;
 895    }
 896
 897    switch (addr) {
 898    case ROCKER_TEST_REG64:
 899        r->test_reg64 = val;
 900        break;
 901    case ROCKER_TEST_DMA_ADDR:
 902        r->test_dma_addr = val;
 903        break;
 904    case ROCKER_PORT_PHYS_ENABLE:
 905        rocker_port_phys_enable_write(r, val);
 906        break;
 907    default:
 908        DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
 909                " val=0x" TARGET_FMT_plx "\n", addr, val);
 910        break;
 911    }
 912}
 913
 914#ifdef DEBUG_ROCKER
 915#define regname(reg) case (reg): return #reg
 916static const char *rocker_reg_name(void *opaque, hwaddr addr)
 917{
 918    Rocker *r = opaque;
 919
 920    if (rocker_addr_is_desc_reg(r, addr)) {
 921        unsigned index = ROCKER_RING_INDEX(addr);
 922        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 923        static char buf[100];
 924        char ring_name[10];
 925
 926        switch (index) {
 927        case 0:
 928            sprintf(ring_name, "cmd");
 929            break;
 930        case 1:
 931            sprintf(ring_name, "event");
 932            break;
 933        default:
 934            sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
 935                    (index - 2) / 2);
 936        }
 937
 938        switch (offset) {
 939        case ROCKER_DMA_DESC_ADDR_OFFSET:
 940            sprintf(buf, "Ring[%s] ADDR", ring_name);
 941            return buf;
 942        case ROCKER_DMA_DESC_ADDR_OFFSET+4:
 943            sprintf(buf, "Ring[%s] ADDR+4", ring_name);
 944            return buf;
 945        case ROCKER_DMA_DESC_SIZE_OFFSET:
 946            sprintf(buf, "Ring[%s] SIZE", ring_name);
 947            return buf;
 948        case ROCKER_DMA_DESC_HEAD_OFFSET:
 949            sprintf(buf, "Ring[%s] HEAD", ring_name);
 950            return buf;
 951        case ROCKER_DMA_DESC_TAIL_OFFSET:
 952            sprintf(buf, "Ring[%s] TAIL", ring_name);
 953            return buf;
 954        case ROCKER_DMA_DESC_CTRL_OFFSET:
 955            sprintf(buf, "Ring[%s] CTRL", ring_name);
 956            return buf;
 957        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 958            sprintf(buf, "Ring[%s] CREDITS", ring_name);
 959            return buf;
 960        default:
 961            sprintf(buf, "Ring[%s] ???", ring_name);
 962            return buf;
 963        }
 964    } else {
 965        switch (addr) {
 966            regname(ROCKER_BOGUS_REG0);
 967            regname(ROCKER_BOGUS_REG1);
 968            regname(ROCKER_BOGUS_REG2);
 969            regname(ROCKER_BOGUS_REG3);
 970            regname(ROCKER_TEST_REG);
 971            regname(ROCKER_TEST_REG64);
 972            regname(ROCKER_TEST_REG64+4);
 973            regname(ROCKER_TEST_IRQ);
 974            regname(ROCKER_TEST_DMA_ADDR);
 975            regname(ROCKER_TEST_DMA_ADDR+4);
 976            regname(ROCKER_TEST_DMA_SIZE);
 977            regname(ROCKER_TEST_DMA_CTRL);
 978            regname(ROCKER_CONTROL);
 979            regname(ROCKER_PORT_PHYS_COUNT);
 980            regname(ROCKER_PORT_PHYS_LINK_STATUS);
 981            regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
 982            regname(ROCKER_PORT_PHYS_ENABLE);
 983            regname(ROCKER_PORT_PHYS_ENABLE+4);
 984            regname(ROCKER_SWITCH_ID);
 985            regname(ROCKER_SWITCH_ID+4);
 986        }
 987    }
 988    return "???";
 989}
 990#else
 991static const char *rocker_reg_name(void *opaque, hwaddr addr)
 992{
 993    return NULL;
 994}
 995#endif
 996
 997static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
 998                              unsigned size)
 999{
1000    DPRINTF("Write %s addr " TARGET_FMT_plx
1001            ", size %u, val " TARGET_FMT_plx "\n",
1002            rocker_reg_name(opaque, addr), addr, size, val);
1003
1004    switch (size) {
1005    case 4:
1006        rocker_io_writel(opaque, addr, val);
1007        break;
1008    case 8:
1009        rocker_io_writeq(opaque, addr, val);
1010        break;
1011    }
1012}
1013
1014static uint64_t rocker_port_phys_link_status(Rocker *r)
1015{
1016    int i;
1017    uint64_t status = 0;
1018
1019    for (i = 0; i < r->fp_ports; i++) {
1020        FpPort *port = r->fp_port[i];
1021
1022        if (fp_port_get_link_up(port)) {
1023            status |= 1 << (i + 1);
1024        }
1025    }
1026    return status;
1027}
1028
1029static uint64_t rocker_port_phys_enable_read(Rocker *r)
1030{
1031    int i;
1032    uint64_t ret = 0;
1033
1034    for (i = 0; i < r->fp_ports; i++) {
1035        FpPort *port = r->fp_port[i];
1036
1037        if (fp_port_enabled(port)) {
1038            ret |= 1 << (i + 1);
1039        }
1040    }
1041    return ret;
1042}
1043
1044static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1045{
1046    Rocker *r = opaque;
1047    uint32_t ret;
1048
1049    if (rocker_addr_is_desc_reg(r, addr)) {
1050        unsigned index = ROCKER_RING_INDEX(addr);
1051        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1052
1053        switch (offset) {
1054        case ROCKER_DMA_DESC_ADDR_OFFSET:
1055            ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1056            break;
1057        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1058            ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1059            break;
1060        case ROCKER_DMA_DESC_SIZE_OFFSET:
1061            ret = desc_ring_get_size(r->rings[index]);
1062            break;
1063        case ROCKER_DMA_DESC_HEAD_OFFSET:
1064            ret = desc_ring_get_head(r->rings[index]);
1065            break;
1066        case ROCKER_DMA_DESC_TAIL_OFFSET:
1067            ret = desc_ring_get_tail(r->rings[index]);
1068            break;
1069        case ROCKER_DMA_DESC_CREDITS_OFFSET:
1070            ret = desc_ring_get_credits(r->rings[index]);
1071            break;
1072        default:
1073            DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1074                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1075            ret = 0;
1076            break;
1077        }
1078        return ret;
1079    }
1080
1081    switch (addr) {
1082    case ROCKER_BOGUS_REG0:
1083    case ROCKER_BOGUS_REG1:
1084    case ROCKER_BOGUS_REG2:
1085    case ROCKER_BOGUS_REG3:
1086        ret = 0xDEADBABE;
1087        break;
1088    case ROCKER_TEST_REG:
1089        ret = r->test_reg * 2;
1090        break;
1091    case ROCKER_TEST_REG64:
1092        ret = (uint32_t)(r->test_reg64 * 2);
1093        break;
1094    case ROCKER_TEST_REG64 + 4:
1095        ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1096        break;
1097    case ROCKER_TEST_DMA_SIZE:
1098        ret = r->test_dma_size;
1099        break;
1100    case ROCKER_TEST_DMA_ADDR:
1101        ret = (uint32_t)r->test_dma_addr;
1102        break;
1103    case ROCKER_TEST_DMA_ADDR + 4:
1104        ret = (uint32_t)(r->test_dma_addr >> 32);
1105        break;
1106    case ROCKER_PORT_PHYS_COUNT:
1107        ret = r->fp_ports;
1108        break;
1109    case ROCKER_PORT_PHYS_LINK_STATUS:
1110        ret = (uint32_t)rocker_port_phys_link_status(r);
1111        break;
1112    case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1113        ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1114        break;
1115    case ROCKER_PORT_PHYS_ENABLE:
1116        ret = (uint32_t)rocker_port_phys_enable_read(r);
1117        break;
1118    case ROCKER_PORT_PHYS_ENABLE + 4:
1119        ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1120        break;
1121    case ROCKER_SWITCH_ID:
1122        ret = (uint32_t)r->switch_id;
1123        break;
1124    case ROCKER_SWITCH_ID + 4:
1125        ret = (uint32_t)(r->switch_id >> 32);
1126        break;
1127    default:
1128        DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1129        ret = 0;
1130        break;
1131    }
1132    return ret;
1133}
1134
1135static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1136{
1137    Rocker *r = opaque;
1138    uint64_t ret;
1139
1140    if (rocker_addr_is_desc_reg(r, addr)) {
1141        unsigned index = ROCKER_RING_INDEX(addr);
1142        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1143
1144        switch (addr & ROCKER_DMA_DESC_MASK) {
1145        case ROCKER_DMA_DESC_ADDR_OFFSET:
1146            ret = desc_ring_get_base_addr(r->rings[index]);
1147            break;
1148        default:
1149            DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1150                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1151            ret = 0;
1152            break;
1153        }
1154        return ret;
1155    }
1156
1157    switch (addr) {
1158    case ROCKER_BOGUS_REG0:
1159    case ROCKER_BOGUS_REG2:
1160        ret = 0xDEADBABEDEADBABEULL;
1161        break;
1162    case ROCKER_TEST_REG64:
1163        ret = r->test_reg64 * 2;
1164        break;
1165    case ROCKER_TEST_DMA_ADDR:
1166        ret = r->test_dma_addr;
1167        break;
1168    case ROCKER_PORT_PHYS_LINK_STATUS:
1169        ret = rocker_port_phys_link_status(r);
1170        break;
1171    case ROCKER_PORT_PHYS_ENABLE:
1172        ret = rocker_port_phys_enable_read(r);
1173        break;
1174    case ROCKER_SWITCH_ID:
1175        ret = r->switch_id;
1176        break;
1177    default:
1178        DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1179        ret = 0;
1180        break;
1181    }
1182    return ret;
1183}
1184
1185static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1186{
1187    DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1188            rocker_reg_name(opaque, addr), addr, size);
1189
1190    switch (size) {
1191    case 4:
1192        return rocker_io_readl(opaque, addr);
1193    case 8:
1194        return rocker_io_readq(opaque, addr);
1195    }
1196
1197    return -1;
1198}
1199
1200static const MemoryRegionOps rocker_mmio_ops = {
1201    .read = rocker_mmio_read,
1202    .write = rocker_mmio_write,
1203    .endianness = DEVICE_LITTLE_ENDIAN,
1204    .valid = {
1205        .min_access_size = 4,
1206        .max_access_size = 8,
1207    },
1208    .impl = {
1209        .min_access_size = 4,
1210        .max_access_size = 8,
1211    },
1212};
1213
1214static void rocker_msix_vectors_unuse(Rocker *r,
1215                                      unsigned int num_vectors)
1216{
1217    PCIDevice *dev = PCI_DEVICE(r);
1218    int i;
1219
1220    for (i = 0; i < num_vectors; i++) {
1221        msix_vector_unuse(dev, i);
1222    }
1223}
1224
1225static int rocker_msix_vectors_use(Rocker *r,
1226                                   unsigned int num_vectors)
1227{
1228    PCIDevice *dev = PCI_DEVICE(r);
1229    int err;
1230    int i;
1231
1232    for (i = 0; i < num_vectors; i++) {
1233        err = msix_vector_use(dev, i);
1234        if (err) {
1235            goto rollback;
1236        }
1237    }
1238    return 0;
1239
1240rollback:
1241    rocker_msix_vectors_unuse(r, i);
1242    return err;
1243}
1244
1245static int rocker_msix_init(Rocker *r, Error **errp)
1246{
1247    PCIDevice *dev = PCI_DEVICE(r);
1248    int err;
1249
1250    err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1251                    &r->msix_bar,
1252                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1253                    &r->msix_bar,
1254                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1255                    0, errp);
1256    if (err) {
1257        return err;
1258    }
1259
1260    err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1261    if (err) {
1262        goto err_msix_vectors_use;
1263    }
1264
1265    return 0;
1266
1267err_msix_vectors_use:
1268    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1269    return err;
1270}
1271
1272static void rocker_msix_uninit(Rocker *r)
1273{
1274    PCIDevice *dev = PCI_DEVICE(r);
1275
1276    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1277    rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1278}
1279
1280static World *rocker_world_type_by_name(Rocker *r, const char *name)
1281{
1282    int i;
1283
1284    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1285        if (strcmp(name, world_name(r->worlds[i])) == 0) {
1286            return r->worlds[i];
1287        }
1288    }
1289    return NULL;
1290}
1291
1292static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1293{
1294    Rocker *r = ROCKER(dev);
1295    const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1296    const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1297    static int sw_index;
1298    int i, err = 0;
1299
1300    /* allocate worlds */
1301
1302    r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1303
1304    if (!r->world_name) {
1305        r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1306    }
1307
1308    r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1309    if (!r->world_dflt) {
1310        error_setg(errp,
1311                "invalid argument requested world %s does not exist",
1312                r->world_name);
1313        goto err_world_type_by_name;
1314    }
1315
1316    /* set up memory-mapped region at BAR0 */
1317
1318    memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1319                          "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1320    pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1321                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1322
1323    /* set up memory-mapped region for MSI-X */
1324
1325    memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1326                       ROCKER_PCI_MSIX_BAR_SIZE);
1327    pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1328                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1329
1330    /* MSI-X init */
1331
1332    err = rocker_msix_init(r, errp);
1333    if (err) {
1334        goto err_msix_init;
1335    }
1336
1337    /* validate switch properties */
1338
1339    if (!r->name) {
1340        r->name = g_strdup(TYPE_ROCKER);
1341    }
1342
1343    if (rocker_find(r->name)) {
1344        error_setg(errp, "%s already exists", r->name);
1345        goto err_duplicate;
1346    }
1347
1348    /* Rocker name is passed in port name requests to OS with the intention
1349     * that the name is used in interface names. Limit the length of the
1350     * rocker name to avoid naming problems in the OS. Also, adding the
1351     * port number as p# and unganged breakout b#, where # is at most 2
1352     * digits, so leave room for it too (-1 for string terminator, -3 for
1353     * p# and -3 for b#)
1354     */
1355#define ROCKER_IFNAMSIZ 16
1356#define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1357    if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1358        error_setg(errp,
1359                "name too long; please shorten to at most %d chars",
1360                MAX_ROCKER_NAME_LEN);
1361        goto err_name_too_long;
1362    }
1363
1364    if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1365        memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1366        r->fp_start_macaddr.a[4] += (sw_index++);
1367    }
1368
1369    if (!r->switch_id) {
1370        memcpy(&r->switch_id, &r->fp_start_macaddr,
1371               sizeof(r->fp_start_macaddr));
1372    }
1373
1374    if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1375        r->fp_ports = ROCKER_FP_PORTS_MAX;
1376    }
1377
1378    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1379
1380    /* Rings are ordered like this:
1381     * - command ring
1382     * - event ring
1383     * - port0 tx ring
1384     * - port0 rx ring
1385     * - port1 tx ring
1386     * - port1 rx ring
1387     * .....
1388     */
1389
1390    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1391        DescRing *ring = desc_ring_alloc(r, i);
1392
1393        if (i == ROCKER_RING_CMD) {
1394            desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1395        } else if (i == ROCKER_RING_EVENT) {
1396            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1397        } else if (i % 2 == 0) {
1398            desc_ring_set_consume(ring, tx_consume,
1399                                  ROCKER_MSIX_VEC_TX((i - 2) / 2));
1400        } else if (i % 2 == 1) {
1401            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1402        }
1403
1404        r->rings[i] = ring;
1405    }
1406
1407    for (i = 0; i < r->fp_ports; i++) {
1408        FpPort *port =
1409            fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1410                          i, &r->fp_ports_peers[i]);
1411
1412        r->fp_port[i] = port;
1413        fp_port_set_world(port, r->world_dflt);
1414    }
1415
1416    QLIST_INSERT_HEAD(&rockers, r, next);
1417
1418    return;
1419
1420err_name_too_long:
1421err_duplicate:
1422    rocker_msix_uninit(r);
1423err_msix_init:
1424    object_unparent(OBJECT(&r->msix_bar));
1425    object_unparent(OBJECT(&r->mmio));
1426err_world_type_by_name:
1427    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1428        if (r->worlds[i]) {
1429            world_free(r->worlds[i]);
1430        }
1431    }
1432}
1433
1434static void pci_rocker_uninit(PCIDevice *dev)
1435{
1436    Rocker *r = ROCKER(dev);
1437    int i;
1438
1439    QLIST_REMOVE(r, next);
1440
1441    for (i = 0; i < r->fp_ports; i++) {
1442        FpPort *port = r->fp_port[i];
1443
1444        fp_port_free(port);
1445        r->fp_port[i] = NULL;
1446    }
1447
1448    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1449        if (r->rings[i]) {
1450            desc_ring_free(r->rings[i]);
1451        }
1452    }
1453    g_free(r->rings);
1454
1455    rocker_msix_uninit(r);
1456    object_unparent(OBJECT(&r->msix_bar));
1457    object_unparent(OBJECT(&r->mmio));
1458
1459    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1460        if (r->worlds[i]) {
1461            world_free(r->worlds[i]);
1462        }
1463    }
1464    g_free(r->fp_ports_peers);
1465}
1466
1467static void rocker_reset(DeviceState *dev)
1468{
1469    Rocker *r = ROCKER(dev);
1470    int i;
1471
1472    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1473        if (r->worlds[i]) {
1474            world_reset(r->worlds[i]);
1475        }
1476    }
1477    for (i = 0; i < r->fp_ports; i++) {
1478        fp_port_reset(r->fp_port[i]);
1479        fp_port_set_world(r->fp_port[i], r->world_dflt);
1480    }
1481
1482    r->test_reg = 0;
1483    r->test_reg64 = 0;
1484    r->test_dma_addr = 0;
1485    r->test_dma_size = 0;
1486
1487    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1488        desc_ring_reset(r->rings[i]);
1489    }
1490
1491    DPRINTF("Reset done\n");
1492}
1493
1494static Property rocker_properties[] = {
1495    DEFINE_PROP_STRING("name", Rocker, name),
1496    DEFINE_PROP_STRING("world", Rocker, world_name),
1497    DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1498                        fp_start_macaddr),
1499    DEFINE_PROP_UINT64("switch_id", Rocker,
1500                       switch_id, 0),
1501    DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1502                      fp_ports_peers, qdev_prop_netdev, NICPeers),
1503    DEFINE_PROP_END_OF_LIST(),
1504};
1505
1506static const VMStateDescription rocker_vmsd = {
1507    .name = TYPE_ROCKER,
1508    .unmigratable = 1,
1509};
1510
1511static void rocker_class_init(ObjectClass *klass, void *data)
1512{
1513    DeviceClass *dc = DEVICE_CLASS(klass);
1514    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1515
1516    k->realize = pci_rocker_realize;
1517    k->exit = pci_rocker_uninit;
1518    k->vendor_id = PCI_VENDOR_ID_REDHAT;
1519    k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1520    k->revision = ROCKER_PCI_REVISION;
1521    k->class_id = PCI_CLASS_NETWORK_OTHER;
1522    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1523    dc->desc = "Rocker Switch";
1524    dc->reset = rocker_reset;
1525    device_class_set_props(dc, rocker_properties);
1526    dc->vmsd = &rocker_vmsd;
1527}
1528
1529static const TypeInfo rocker_info = {
1530    .name          = TYPE_ROCKER,
1531    .parent        = TYPE_PCI_DEVICE,
1532    .instance_size = sizeof(Rocker),
1533    .class_init    = rocker_class_init,
1534    .interfaces = (InterfaceInfo[]) {
1535        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1536        { },
1537    },
1538};
1539
1540static void rocker_register_types(void)
1541{
1542    type_register_static(&rocker_info);
1543}
1544
1545type_init(rocker_register_types)
1546