qemu/hw/net/rocker/rocker.c
<<
>>
Prefs
   1/*
   2 * QEMU rocker switch emulation - PCI device
   3 *
   4 * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
   5 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "hw/hw.h"
  20#include "hw/pci/pci.h"
  21#include "hw/pci/msix.h"
  22#include "net/net.h"
  23#include "net/eth.h"
  24#include "qapi/error.h"
  25#include "qapi/qapi-commands-rocker.h"
  26#include "qemu/iov.h"
  27#include "qemu/bitops.h"
  28
  29#include "rocker.h"
  30#include "rocker_hw.h"
  31#include "rocker_fp.h"
  32#include "rocker_desc.h"
  33#include "rocker_tlv.h"
  34#include "rocker_world.h"
  35#include "rocker_of_dpa.h"
  36
  37struct rocker {
  38    /* private */
  39    PCIDevice parent_obj;
  40    /* public */
  41
  42    MemoryRegion mmio;
  43    MemoryRegion msix_bar;
  44
  45    /* switch configuration */
  46    char *name;                  /* switch name */
  47    char *world_name;            /* world name */
  48    uint32_t fp_ports;           /* front-panel port count */
  49    NICPeers *fp_ports_peers;
  50    MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
  51    uint64_t switch_id;          /* switch id */
  52
  53    /* front-panel ports */
  54    FpPort *fp_port[ROCKER_FP_PORTS_MAX];
  55
  56    /* register backings */
  57    uint32_t test_reg;
  58    uint64_t test_reg64;
  59    dma_addr_t test_dma_addr;
  60    uint32_t test_dma_size;
  61    uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
  62
  63    /* desc rings */
  64    DescRing **rings;
  65
  66    /* switch worlds */
  67    World *worlds[ROCKER_WORLD_TYPE_MAX];
  68    World *world_dflt;
  69
  70    QLIST_ENTRY(rocker) next;
  71};
  72
  73#define TYPE_ROCKER "rocker"
  74
  75#define ROCKER(obj) \
  76    OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
  77
  78static QLIST_HEAD(, rocker) rockers;
  79
  80Rocker *rocker_find(const char *name)
  81{
  82    Rocker *r;
  83
  84    QLIST_FOREACH(r, &rockers, next)
  85        if (strcmp(r->name, name) == 0) {
  86            return r;
  87        }
  88
  89    return NULL;
  90}
  91
  92World *rocker_get_world(Rocker *r, enum rocker_world_type type)
  93{
  94    if (type < ROCKER_WORLD_TYPE_MAX) {
  95        return r->worlds[type];
  96    }
  97    return NULL;
  98}
  99
 100RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 101{
 102    RockerSwitch *rocker;
 103    Rocker *r;
 104
 105    r = rocker_find(name);
 106    if (!r) {
 107        error_setg(errp, "rocker %s not found", name);
 108        return NULL;
 109    }
 110
 111    rocker = g_new0(RockerSwitch, 1);
 112    rocker->name = g_strdup(r->name);
 113    rocker->id = r->switch_id;
 114    rocker->ports = r->fp_ports;
 115
 116    return rocker;
 117}
 118
 119RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 120{
 121    RockerPortList *list = NULL;
 122    Rocker *r;
 123    int i;
 124
 125    r = rocker_find(name);
 126    if (!r) {
 127        error_setg(errp, "rocker %s not found", name);
 128        return NULL;
 129    }
 130
 131    for (i = r->fp_ports - 1; i >= 0; i--) {
 132        RockerPortList *info = g_malloc0(sizeof(*info));
 133        info->value = g_malloc0(sizeof(*info->value));
 134        struct fp_port *port = r->fp_port[i];
 135
 136        fp_port_get_info(port, info);
 137        info->next = list;
 138        list = info;
 139    }
 140
 141    return list;
 142}
 143
 144uint32_t rocker_fp_ports(Rocker *r)
 145{
 146    return r->fp_ports;
 147}
 148
 149static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
 150                                            DescRing *ring)
 151{
 152    return (desc_ring_index(ring) - 2) / 2 + 1;
 153}
 154
 155static int tx_consume(Rocker *r, DescInfo *info)
 156{
 157    PCIDevice *dev = PCI_DEVICE(r);
 158    char *buf = desc_get_buf(info, true);
 159    RockerTlv *tlv_frag;
 160    RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
 161    struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
 162    uint32_t pport;
 163    uint32_t port;
 164    uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
 165    uint16_t tx_l3_csum_off = 0;
 166    uint16_t tx_tso_mss = 0;
 167    uint16_t tx_tso_hdr_len = 0;
 168    int iovcnt = 0;
 169    int err = ROCKER_OK;
 170    int rem;
 171    int i;
 172
 173    if (!buf) {
 174        return -ROCKER_ENXIO;
 175    }
 176
 177    rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
 178
 179    if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
 180        return -ROCKER_EINVAL;
 181    }
 182
 183    pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
 184    if (!fp_port_from_pport(pport, &port)) {
 185        return -ROCKER_EINVAL;
 186    }
 187
 188    if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
 189        tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
 190    }
 191
 192    switch (tx_offload) {
 193    case ROCKER_TX_OFFLOAD_L3_CSUM:
 194        if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 195            return -ROCKER_EINVAL;
 196        }
 197        break;
 198    case ROCKER_TX_OFFLOAD_TSO:
 199        if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
 200            !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 201            return -ROCKER_EINVAL;
 202        }
 203        break;
 204    }
 205
 206    if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
 207        tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
 208    }
 209
 210    if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
 211        tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
 212    }
 213
 214    if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
 215        tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
 216    }
 217
 218    rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
 219        hwaddr frag_addr;
 220        uint16_t frag_len;
 221
 222        if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
 223            err = -ROCKER_EINVAL;
 224            goto err_bad_attr;
 225        }
 226
 227        rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
 228
 229        if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
 230            !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
 231            err = -ROCKER_EINVAL;
 232            goto err_bad_attr;
 233        }
 234
 235        frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
 236        frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
 237
 238        if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
 239            goto err_too_many_frags;
 240        }
 241        iov[iovcnt].iov_len = frag_len;
 242        iov[iovcnt].iov_base = g_malloc(frag_len);
 243
 244        pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
 245                     iov[iovcnt].iov_len);
 246
 247        iovcnt++;
 248    }
 249
 250    if (iovcnt) {
 251        /* XXX perform Tx offloads */
 252        /* XXX   silence compiler for now */
 253        tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
 254    }
 255
 256    err = fp_port_eg(r->fp_port[port], iov, iovcnt);
 257
 258err_too_many_frags:
 259err_bad_attr:
 260    for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
 261        g_free(iov[i].iov_base);
 262    }
 263
 264    return err;
 265}
 266
 267static int cmd_get_port_settings(Rocker *r,
 268                                 DescInfo *info, char *buf,
 269                                 RockerTlv *cmd_info_tlv)
 270{
 271    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 272    RockerTlv *nest;
 273    FpPort *fp_port;
 274    uint32_t pport;
 275    uint32_t port;
 276    uint32_t speed;
 277    uint8_t duplex;
 278    uint8_t autoneg;
 279    uint8_t learning;
 280    char *phys_name;
 281    MACAddr macaddr;
 282    enum rocker_world_type mode;
 283    size_t tlv_size;
 284    int pos;
 285    int err;
 286
 287    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 288                            cmd_info_tlv);
 289
 290    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 291        return -ROCKER_EINVAL;
 292    }
 293
 294    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 295    if (!fp_port_from_pport(pport, &port)) {
 296        return -ROCKER_EINVAL;
 297    }
 298    fp_port = r->fp_port[port];
 299
 300    err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
 301    if (err) {
 302        return err;
 303    }
 304
 305    fp_port_get_macaddr(fp_port, &macaddr);
 306    mode = world_type(fp_port_get_world(fp_port));
 307    learning = fp_port_get_learning(fp_port);
 308    phys_name = fp_port_get_name(fp_port);
 309
 310    tlv_size = rocker_tlv_total_size(0) +                 /* nest */
 311               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 312               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
 313               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
 314               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
 315               rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
 316               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
 317               rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
 318               rocker_tlv_total_size(strlen(phys_name));
 319
 320    if (tlv_size > desc_buf_size(info)) {
 321        return -ROCKER_EMSGSIZE;
 322    }
 323
 324    pos = 0;
 325    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
 326    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
 327    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
 328    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
 329    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
 330    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
 331                   sizeof(macaddr.a), macaddr.a);
 332    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
 333    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
 334                      learning);
 335    rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
 336                   strlen(phys_name), phys_name);
 337    rocker_tlv_nest_end(buf, &pos, nest);
 338
 339    return desc_set_buf(info, tlv_size);
 340}
 341
 342static int cmd_set_port_settings(Rocker *r,
 343                                 RockerTlv *cmd_info_tlv)
 344{
 345    RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
 346    FpPort *fp_port;
 347    uint32_t pport;
 348    uint32_t port;
 349    uint32_t speed;
 350    uint8_t duplex;
 351    uint8_t autoneg;
 352    uint8_t learning;
 353    MACAddr macaddr;
 354    enum rocker_world_type mode;
 355    int err;
 356
 357    rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
 358                            cmd_info_tlv);
 359
 360    if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
 361        return -ROCKER_EINVAL;
 362    }
 363
 364    pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
 365    if (!fp_port_from_pport(pport, &port)) {
 366        return -ROCKER_EINVAL;
 367    }
 368    fp_port = r->fp_port[port];
 369
 370    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
 371        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
 372        tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
 373
 374        speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
 375        duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
 376        autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
 377
 378        err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
 379        if (err) {
 380            return err;
 381        }
 382    }
 383
 384    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
 385        if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
 386            sizeof(macaddr.a)) {
 387            return -ROCKER_EINVAL;
 388        }
 389        memcpy(macaddr.a,
 390               rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
 391               sizeof(macaddr.a));
 392        fp_port_set_macaddr(fp_port, &macaddr);
 393    }
 394
 395    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 396        mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
 397        if (mode >= ROCKER_WORLD_TYPE_MAX) {
 398            return -ROCKER_EINVAL;
 399        }
 400        /* We don't support world change. */
 401        if (!fp_port_check_world(fp_port, r->worlds[mode])) {
 402            return -ROCKER_EINVAL;
 403        }
 404    }
 405
 406    if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
 407        learning =
 408            rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
 409        fp_port_set_learning(fp_port, learning);
 410    }
 411
 412    return ROCKER_OK;
 413}
 414
 415static int cmd_consume(Rocker *r, DescInfo *info)
 416{
 417    char *buf = desc_get_buf(info, false);
 418    RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
 419    RockerTlv *info_tlv;
 420    World *world;
 421    uint16_t cmd;
 422    int err;
 423
 424    if (!buf) {
 425        return -ROCKER_ENXIO;
 426    }
 427
 428    rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
 429
 430    if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
 431        return -ROCKER_EINVAL;
 432    }
 433
 434    cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
 435    info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
 436
 437    /* This might be reworked to something like this:
 438     * Every world will have an array of command handlers from
 439     * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
 440     * up to each world to implement whatever command it want.
 441     * It can reference "generic" commands as cmd_set_port_settings or
 442     * cmd_get_port_settings
 443     */
 444
 445    switch (cmd) {
 446    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
 447    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
 448    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
 449    case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
 450    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
 451    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
 452    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
 453    case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
 454        world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 455        err = world_do_cmd(world, info, buf, cmd, info_tlv);
 456        break;
 457    case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
 458        err = cmd_get_port_settings(r, info, buf, info_tlv);
 459        break;
 460    case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
 461        err = cmd_set_port_settings(r, info_tlv);
 462        break;
 463    default:
 464        err = -ROCKER_EINVAL;
 465        break;
 466    }
 467
 468    return err;
 469}
 470
 471static void rocker_msix_irq(Rocker *r, unsigned vector)
 472{
 473    PCIDevice *dev = PCI_DEVICE(r);
 474
 475    DPRINTF("MSI-X notify request for vector %d\n", vector);
 476    if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
 477        DPRINTF("incorrect vector %d\n", vector);
 478        return;
 479    }
 480    msix_notify(dev, vector);
 481}
 482
 483int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
 484{
 485    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 486    DescInfo *info = desc_ring_fetch_desc(ring);
 487    RockerTlv *nest;
 488    char *buf;
 489    size_t tlv_size;
 490    int pos;
 491    int err;
 492
 493    if (!info) {
 494        return -ROCKER_ENOBUFS;
 495    }
 496
 497    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 498               rocker_tlv_total_size(0) +                 /* nest */
 499               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 500               rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
 501
 502    if (tlv_size > desc_buf_size(info)) {
 503        err = -ROCKER_EMSGSIZE;
 504        goto err_too_big;
 505    }
 506
 507    buf = desc_get_buf(info, false);
 508    if (!buf) {
 509        err = -ROCKER_ENOMEM;
 510        goto err_no_mem;
 511    }
 512
 513    pos = 0;
 514    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 515                        ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
 516    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 517    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
 518    rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
 519                      link_up ? 1 : 0);
 520    rocker_tlv_nest_end(buf, &pos, nest);
 521
 522    err = desc_set_buf(info, tlv_size);
 523
 524err_too_big:
 525err_no_mem:
 526    if (desc_ring_post_desc(ring, err)) {
 527        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 528    }
 529
 530    return err;
 531}
 532
 533int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
 534                               uint16_t vlan_id)
 535{
 536    DescRing *ring = r->rings[ROCKER_RING_EVENT];
 537    DescInfo *info;
 538    FpPort *fp_port;
 539    uint32_t port;
 540    RockerTlv *nest;
 541    char *buf;
 542    size_t tlv_size;
 543    int pos;
 544    int err;
 545
 546    if (!fp_port_from_pport(pport, &port)) {
 547        return -ROCKER_EINVAL;
 548    }
 549    fp_port = r->fp_port[port];
 550    if (!fp_port_get_learning(fp_port)) {
 551        return ROCKER_OK;
 552    }
 553
 554    info = desc_ring_fetch_desc(ring);
 555    if (!info) {
 556        return -ROCKER_ENOBUFS;
 557    }
 558
 559    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
 560               rocker_tlv_total_size(0) +                 /* nest */
 561               rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
 562               rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
 563               rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
 564
 565    if (tlv_size > desc_buf_size(info)) {
 566        err = -ROCKER_EMSGSIZE;
 567        goto err_too_big;
 568    }
 569
 570    buf = desc_get_buf(info, false);
 571    if (!buf) {
 572        err = -ROCKER_ENOMEM;
 573        goto err_no_mem;
 574    }
 575
 576    pos = 0;
 577    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
 578                        ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
 579    nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
 580    rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
 581    rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
 582    rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
 583    rocker_tlv_nest_end(buf, &pos, nest);
 584
 585    err = desc_set_buf(info, tlv_size);
 586
 587err_too_big:
 588err_no_mem:
 589    if (desc_ring_post_desc(ring, err)) {
 590        rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
 591    }
 592
 593    return err;
 594}
 595
 596static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
 597                                                     uint32_t pport)
 598{
 599    return r->rings[(pport - 1) * 2 + 3];
 600}
 601
 602int rx_produce(World *world, uint32_t pport,
 603               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 604{
 605    Rocker *r = world_rocker(world);
 606    PCIDevice *dev = (PCIDevice *)r;
 607    DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
 608    DescInfo *info = desc_ring_fetch_desc(ring);
 609    char *data;
 610    size_t data_size = iov_size(iov, iovcnt);
 611    char *buf;
 612    uint16_t rx_flags = 0;
 613    uint16_t rx_csum = 0;
 614    size_t tlv_size;
 615    RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
 616    hwaddr frag_addr;
 617    uint16_t frag_max_len;
 618    int pos;
 619    int err;
 620
 621    if (!info) {
 622        return -ROCKER_ENOBUFS;
 623    }
 624
 625    buf = desc_get_buf(info, false);
 626    if (!buf) {
 627        err = -ROCKER_ENXIO;
 628        goto out;
 629    }
 630    rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
 631
 632    if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
 633        !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
 634        err = -ROCKER_EINVAL;
 635        goto out;
 636    }
 637
 638    frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
 639    frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
 640
 641    if (data_size > frag_max_len) {
 642        err = -ROCKER_EMSGSIZE;
 643        goto out;
 644    }
 645
 646    if (copy_to_cpu) {
 647        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
 648    }
 649
 650    /* XXX calc rx flags/csum */
 651
 652    tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
 653               rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
 654               rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
 655               rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
 656               rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
 657
 658    if (tlv_size > desc_buf_size(info)) {
 659        err = -ROCKER_EMSGSIZE;
 660        goto out;
 661    }
 662
 663    /* TODO:
 664     * iov dma write can be optimized in similar way e1000 does it in
 665     * e1000_receive_iov. But maybe if would make sense to introduce
 666     * generic helper iov_dma_write.
 667     */
 668
 669    data = g_malloc(data_size);
 670
 671    iov_to_buf(iov, iovcnt, 0, data, data_size);
 672    pci_dma_write(dev, frag_addr, data, data_size);
 673    g_free(data);
 674
 675    pos = 0;
 676    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
 677    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
 678    rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
 679    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
 680    rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
 681
 682    err = desc_set_buf(info, tlv_size);
 683
 684out:
 685    if (desc_ring_post_desc(ring, err)) {
 686        rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
 687    }
 688
 689    return err;
 690}
 691
 692int rocker_port_eg(Rocker *r, uint32_t pport,
 693                   const struct iovec *iov, int iovcnt)
 694{
 695    FpPort *fp_port;
 696    uint32_t port;
 697
 698    if (!fp_port_from_pport(pport, &port)) {
 699        return -ROCKER_EINVAL;
 700    }
 701
 702    fp_port = r->fp_port[port];
 703
 704    return fp_port_eg(fp_port, iov, iovcnt);
 705}
 706
 707static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
 708{
 709    PCIDevice *dev = PCI_DEVICE(r);
 710    char *buf;
 711    int i;
 712
 713    buf = g_malloc(r->test_dma_size);
 714
 715    switch (val) {
 716    case ROCKER_TEST_DMA_CTRL_CLEAR:
 717        memset(buf, 0, r->test_dma_size);
 718        break;
 719    case ROCKER_TEST_DMA_CTRL_FILL:
 720        memset(buf, 0x96, r->test_dma_size);
 721        break;
 722    case ROCKER_TEST_DMA_CTRL_INVERT:
 723        pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
 724        for (i = 0; i < r->test_dma_size; i++) {
 725            buf[i] = ~buf[i];
 726        }
 727        break;
 728    default:
 729        DPRINTF("not test dma control val=0x%08x\n", val);
 730        goto err_out;
 731    }
 732    pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
 733
 734    rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
 735
 736err_out:
 737    g_free(buf);
 738}
 739
 740static void rocker_reset(DeviceState *dev);
 741
 742static void rocker_control(Rocker *r, uint32_t val)
 743{
 744    if (val & ROCKER_CONTROL_RESET) {
 745        rocker_reset(DEVICE(r));
 746    }
 747}
 748
 749static int rocker_pci_ring_count(Rocker *r)
 750{
 751    /* There are:
 752     * - command ring
 753     * - event ring
 754     * - tx and rx ring per each port
 755     */
 756    return 2 + (2 * r->fp_ports);
 757}
 758
 759static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
 760{
 761    hwaddr start = ROCKER_DMA_DESC_BASE;
 762    hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
 763
 764    return addr >= start && addr < end;
 765}
 766
 767static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
 768{
 769    int i;
 770    bool old_enabled;
 771    bool new_enabled;
 772    FpPort *fp_port;
 773
 774    for (i = 0; i < r->fp_ports; i++) {
 775        fp_port = r->fp_port[i];
 776        old_enabled = fp_port_enabled(fp_port);
 777        new_enabled = (new >> (i + 1)) & 0x1;
 778        if (new_enabled == old_enabled) {
 779            continue;
 780        }
 781        if (new_enabled) {
 782            fp_port_enable(r->fp_port[i]);
 783        } else {
 784            fp_port_disable(r->fp_port[i]);
 785        }
 786    }
 787}
 788
 789static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
 790{
 791    Rocker *r = opaque;
 792
 793    if (rocker_addr_is_desc_reg(r, addr)) {
 794        unsigned index = ROCKER_RING_INDEX(addr);
 795        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 796
 797        switch (offset) {
 798        case ROCKER_DMA_DESC_ADDR_OFFSET:
 799            r->lower32 = (uint64_t)val;
 800            break;
 801        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
 802            desc_ring_set_base_addr(r->rings[index],
 803                                    ((uint64_t)val) << 32 | r->lower32);
 804            r->lower32 = 0;
 805            break;
 806        case ROCKER_DMA_DESC_SIZE_OFFSET:
 807            desc_ring_set_size(r->rings[index], val);
 808            break;
 809        case ROCKER_DMA_DESC_HEAD_OFFSET:
 810            if (desc_ring_set_head(r->rings[index], val)) {
 811                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 812            }
 813            break;
 814        case ROCKER_DMA_DESC_CTRL_OFFSET:
 815            desc_ring_set_ctrl(r->rings[index], val);
 816            break;
 817        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 818            if (desc_ring_ret_credits(r->rings[index], val)) {
 819                rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
 820            }
 821            break;
 822        default:
 823            DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
 824                    " val=0x%08x (ring %d, addr=0x%02x)\n",
 825                    addr, val, index, offset);
 826            break;
 827        }
 828        return;
 829    }
 830
 831    switch (addr) {
 832    case ROCKER_TEST_REG:
 833        r->test_reg = val;
 834        break;
 835    case ROCKER_TEST_REG64:
 836    case ROCKER_TEST_DMA_ADDR:
 837    case ROCKER_PORT_PHYS_ENABLE:
 838        r->lower32 = (uint64_t)val;
 839        break;
 840    case ROCKER_TEST_REG64 + 4:
 841        r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
 842        r->lower32 = 0;
 843        break;
 844    case ROCKER_TEST_IRQ:
 845        rocker_msix_irq(r, val);
 846        break;
 847    case ROCKER_TEST_DMA_SIZE:
 848        r->test_dma_size = val & 0xFFFF;
 849        break;
 850    case ROCKER_TEST_DMA_ADDR + 4:
 851        r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
 852        r->lower32 = 0;
 853        break;
 854    case ROCKER_TEST_DMA_CTRL:
 855        rocker_test_dma_ctrl(r, val);
 856        break;
 857    case ROCKER_CONTROL:
 858        rocker_control(r, val);
 859        break;
 860    case ROCKER_PORT_PHYS_ENABLE + 4:
 861        rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
 862        r->lower32 = 0;
 863        break;
 864    default:
 865        DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
 866                " val=0x%08x\n", addr, val);
 867        break;
 868    }
 869}
 870
 871static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
 872{
 873    Rocker *r = opaque;
 874
 875    if (rocker_addr_is_desc_reg(r, addr)) {
 876        unsigned index = ROCKER_RING_INDEX(addr);
 877        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 878
 879        switch (offset) {
 880        case ROCKER_DMA_DESC_ADDR_OFFSET:
 881            desc_ring_set_base_addr(r->rings[index], val);
 882            break;
 883        default:
 884            DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
 885                    " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
 886                    addr, val, index, offset);
 887            break;
 888        }
 889        return;
 890    }
 891
 892    switch (addr) {
 893    case ROCKER_TEST_REG64:
 894        r->test_reg64 = val;
 895        break;
 896    case ROCKER_TEST_DMA_ADDR:
 897        r->test_dma_addr = val;
 898        break;
 899    case ROCKER_PORT_PHYS_ENABLE:
 900        rocker_port_phys_enable_write(r, val);
 901        break;
 902    default:
 903        DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
 904                " val=0x" TARGET_FMT_plx "\n", addr, val);
 905        break;
 906    }
 907}
 908
 909#ifdef DEBUG_ROCKER
 910#define regname(reg) case (reg): return #reg
 911static const char *rocker_reg_name(void *opaque, hwaddr addr)
 912{
 913    Rocker *r = opaque;
 914
 915    if (rocker_addr_is_desc_reg(r, addr)) {
 916        unsigned index = ROCKER_RING_INDEX(addr);
 917        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
 918        static char buf[100];
 919        char ring_name[10];
 920
 921        switch (index) {
 922        case 0:
 923            sprintf(ring_name, "cmd");
 924            break;
 925        case 1:
 926            sprintf(ring_name, "event");
 927            break;
 928        default:
 929            sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
 930                    (index - 2) / 2);
 931        }
 932
 933        switch (offset) {
 934        case ROCKER_DMA_DESC_ADDR_OFFSET:
 935            sprintf(buf, "Ring[%s] ADDR", ring_name);
 936            return buf;
 937        case ROCKER_DMA_DESC_ADDR_OFFSET+4:
 938            sprintf(buf, "Ring[%s] ADDR+4", ring_name);
 939            return buf;
 940        case ROCKER_DMA_DESC_SIZE_OFFSET:
 941            sprintf(buf, "Ring[%s] SIZE", ring_name);
 942            return buf;
 943        case ROCKER_DMA_DESC_HEAD_OFFSET:
 944            sprintf(buf, "Ring[%s] HEAD", ring_name);
 945            return buf;
 946        case ROCKER_DMA_DESC_TAIL_OFFSET:
 947            sprintf(buf, "Ring[%s] TAIL", ring_name);
 948            return buf;
 949        case ROCKER_DMA_DESC_CTRL_OFFSET:
 950            sprintf(buf, "Ring[%s] CTRL", ring_name);
 951            return buf;
 952        case ROCKER_DMA_DESC_CREDITS_OFFSET:
 953            sprintf(buf, "Ring[%s] CREDITS", ring_name);
 954            return buf;
 955        default:
 956            sprintf(buf, "Ring[%s] ???", ring_name);
 957            return buf;
 958        }
 959    } else {
 960        switch (addr) {
 961            regname(ROCKER_BOGUS_REG0);
 962            regname(ROCKER_BOGUS_REG1);
 963            regname(ROCKER_BOGUS_REG2);
 964            regname(ROCKER_BOGUS_REG3);
 965            regname(ROCKER_TEST_REG);
 966            regname(ROCKER_TEST_REG64);
 967            regname(ROCKER_TEST_REG64+4);
 968            regname(ROCKER_TEST_IRQ);
 969            regname(ROCKER_TEST_DMA_ADDR);
 970            regname(ROCKER_TEST_DMA_ADDR+4);
 971            regname(ROCKER_TEST_DMA_SIZE);
 972            regname(ROCKER_TEST_DMA_CTRL);
 973            regname(ROCKER_CONTROL);
 974            regname(ROCKER_PORT_PHYS_COUNT);
 975            regname(ROCKER_PORT_PHYS_LINK_STATUS);
 976            regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
 977            regname(ROCKER_PORT_PHYS_ENABLE);
 978            regname(ROCKER_PORT_PHYS_ENABLE+4);
 979            regname(ROCKER_SWITCH_ID);
 980            regname(ROCKER_SWITCH_ID+4);
 981        }
 982    }
 983    return "???";
 984}
 985#else
 986static const char *rocker_reg_name(void *opaque, hwaddr addr)
 987{
 988    return NULL;
 989}
 990#endif
 991
 992static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
 993                              unsigned size)
 994{
 995    DPRINTF("Write %s addr " TARGET_FMT_plx
 996            ", size %u, val " TARGET_FMT_plx "\n",
 997            rocker_reg_name(opaque, addr), addr, size, val);
 998
 999    switch (size) {
1000    case 4:
1001        rocker_io_writel(opaque, addr, val);
1002        break;
1003    case 8:
1004        rocker_io_writeq(opaque, addr, val);
1005        break;
1006    }
1007}
1008
1009static uint64_t rocker_port_phys_link_status(Rocker *r)
1010{
1011    int i;
1012    uint64_t status = 0;
1013
1014    for (i = 0; i < r->fp_ports; i++) {
1015        FpPort *port = r->fp_port[i];
1016
1017        if (fp_port_get_link_up(port)) {
1018            status |= 1 << (i + 1);
1019        }
1020    }
1021    return status;
1022}
1023
1024static uint64_t rocker_port_phys_enable_read(Rocker *r)
1025{
1026    int i;
1027    uint64_t ret = 0;
1028
1029    for (i = 0; i < r->fp_ports; i++) {
1030        FpPort *port = r->fp_port[i];
1031
1032        if (fp_port_enabled(port)) {
1033            ret |= 1 << (i + 1);
1034        }
1035    }
1036    return ret;
1037}
1038
1039static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1040{
1041    Rocker *r = opaque;
1042    uint32_t ret;
1043
1044    if (rocker_addr_is_desc_reg(r, addr)) {
1045        unsigned index = ROCKER_RING_INDEX(addr);
1046        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1047
1048        switch (offset) {
1049        case ROCKER_DMA_DESC_ADDR_OFFSET:
1050            ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1051            break;
1052        case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1053            ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1054            break;
1055        case ROCKER_DMA_DESC_SIZE_OFFSET:
1056            ret = desc_ring_get_size(r->rings[index]);
1057            break;
1058        case ROCKER_DMA_DESC_HEAD_OFFSET:
1059            ret = desc_ring_get_head(r->rings[index]);
1060            break;
1061        case ROCKER_DMA_DESC_TAIL_OFFSET:
1062            ret = desc_ring_get_tail(r->rings[index]);
1063            break;
1064        case ROCKER_DMA_DESC_CREDITS_OFFSET:
1065            ret = desc_ring_get_credits(r->rings[index]);
1066            break;
1067        default:
1068            DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1069                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1070            ret = 0;
1071            break;
1072        }
1073        return ret;
1074    }
1075
1076    switch (addr) {
1077    case ROCKER_BOGUS_REG0:
1078    case ROCKER_BOGUS_REG1:
1079    case ROCKER_BOGUS_REG2:
1080    case ROCKER_BOGUS_REG3:
1081        ret = 0xDEADBABE;
1082        break;
1083    case ROCKER_TEST_REG:
1084        ret = r->test_reg * 2;
1085        break;
1086    case ROCKER_TEST_REG64:
1087        ret = (uint32_t)(r->test_reg64 * 2);
1088        break;
1089    case ROCKER_TEST_REG64 + 4:
1090        ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1091        break;
1092    case ROCKER_TEST_DMA_SIZE:
1093        ret = r->test_dma_size;
1094        break;
1095    case ROCKER_TEST_DMA_ADDR:
1096        ret = (uint32_t)r->test_dma_addr;
1097        break;
1098    case ROCKER_TEST_DMA_ADDR + 4:
1099        ret = (uint32_t)(r->test_dma_addr >> 32);
1100        break;
1101    case ROCKER_PORT_PHYS_COUNT:
1102        ret = r->fp_ports;
1103        break;
1104    case ROCKER_PORT_PHYS_LINK_STATUS:
1105        ret = (uint32_t)rocker_port_phys_link_status(r);
1106        break;
1107    case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1108        ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1109        break;
1110    case ROCKER_PORT_PHYS_ENABLE:
1111        ret = (uint32_t)rocker_port_phys_enable_read(r);
1112        break;
1113    case ROCKER_PORT_PHYS_ENABLE + 4:
1114        ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1115        break;
1116    case ROCKER_SWITCH_ID:
1117        ret = (uint32_t)r->switch_id;
1118        break;
1119    case ROCKER_SWITCH_ID + 4:
1120        ret = (uint32_t)(r->switch_id >> 32);
1121        break;
1122    default:
1123        DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1124        ret = 0;
1125        break;
1126    }
1127    return ret;
1128}
1129
1130static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1131{
1132    Rocker *r = opaque;
1133    uint64_t ret;
1134
1135    if (rocker_addr_is_desc_reg(r, addr)) {
1136        unsigned index = ROCKER_RING_INDEX(addr);
1137        unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1138
1139        switch (addr & ROCKER_DMA_DESC_MASK) {
1140        case ROCKER_DMA_DESC_ADDR_OFFSET:
1141            ret = desc_ring_get_base_addr(r->rings[index]);
1142            break;
1143        default:
1144            DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1145                    " (ring %d, addr=0x%02x)\n", addr, index, offset);
1146            ret = 0;
1147            break;
1148        }
1149        return ret;
1150    }
1151
1152    switch (addr) {
1153    case ROCKER_BOGUS_REG0:
1154    case ROCKER_BOGUS_REG2:
1155        ret = 0xDEADBABEDEADBABEULL;
1156        break;
1157    case ROCKER_TEST_REG64:
1158        ret = r->test_reg64 * 2;
1159        break;
1160    case ROCKER_TEST_DMA_ADDR:
1161        ret = r->test_dma_addr;
1162        break;
1163    case ROCKER_PORT_PHYS_LINK_STATUS:
1164        ret = rocker_port_phys_link_status(r);
1165        break;
1166    case ROCKER_PORT_PHYS_ENABLE:
1167        ret = rocker_port_phys_enable_read(r);
1168        break;
1169    case ROCKER_SWITCH_ID:
1170        ret = r->switch_id;
1171        break;
1172    default:
1173        DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1174        ret = 0;
1175        break;
1176    }
1177    return ret;
1178}
1179
1180static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1181{
1182    DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1183            rocker_reg_name(opaque, addr), addr, size);
1184
1185    switch (size) {
1186    case 4:
1187        return rocker_io_readl(opaque, addr);
1188    case 8:
1189        return rocker_io_readq(opaque, addr);
1190    }
1191
1192    return -1;
1193}
1194
1195static const MemoryRegionOps rocker_mmio_ops = {
1196    .read = rocker_mmio_read,
1197    .write = rocker_mmio_write,
1198    .endianness = DEVICE_LITTLE_ENDIAN,
1199    .valid = {
1200        .min_access_size = 4,
1201        .max_access_size = 8,
1202    },
1203    .impl = {
1204        .min_access_size = 4,
1205        .max_access_size = 8,
1206    },
1207};
1208
1209static void rocker_msix_vectors_unuse(Rocker *r,
1210                                      unsigned int num_vectors)
1211{
1212    PCIDevice *dev = PCI_DEVICE(r);
1213    int i;
1214
1215    for (i = 0; i < num_vectors; i++) {
1216        msix_vector_unuse(dev, i);
1217    }
1218}
1219
1220static int rocker_msix_vectors_use(Rocker *r,
1221                                   unsigned int num_vectors)
1222{
1223    PCIDevice *dev = PCI_DEVICE(r);
1224    int err;
1225    int i;
1226
1227    for (i = 0; i < num_vectors; i++) {
1228        err = msix_vector_use(dev, i);
1229        if (err) {
1230            goto rollback;
1231        }
1232    }
1233    return 0;
1234
1235rollback:
1236    rocker_msix_vectors_unuse(r, i);
1237    return err;
1238}
1239
1240static int rocker_msix_init(Rocker *r, Error **errp)
1241{
1242    PCIDevice *dev = PCI_DEVICE(r);
1243    int err;
1244
1245    err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1246                    &r->msix_bar,
1247                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1248                    &r->msix_bar,
1249                    ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1250                    0, errp);
1251    if (err) {
1252        return err;
1253    }
1254
1255    err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1256    if (err) {
1257        goto err_msix_vectors_use;
1258    }
1259
1260    return 0;
1261
1262err_msix_vectors_use:
1263    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1264    return err;
1265}
1266
1267static void rocker_msix_uninit(Rocker *r)
1268{
1269    PCIDevice *dev = PCI_DEVICE(r);
1270
1271    msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1272    rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1273}
1274
1275static World *rocker_world_type_by_name(Rocker *r, const char *name)
1276{
1277    int i;
1278
1279    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1280        if (strcmp(name, world_name(r->worlds[i])) == 0) {
1281            return r->worlds[i];
1282        }
1283    }
1284    return NULL;
1285}
1286
1287static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1288{
1289    Rocker *r = ROCKER(dev);
1290    const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1291    const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1292    static int sw_index;
1293    int i, err = 0;
1294
1295    /* allocate worlds */
1296
1297    r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1298
1299    if (!r->world_name) {
1300        r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1301    }
1302
1303    r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1304    if (!r->world_dflt) {
1305        error_setg(errp,
1306                "invalid argument requested world %s does not exist",
1307                r->world_name);
1308        goto err_world_type_by_name;
1309    }
1310
1311    /* set up memory-mapped region at BAR0 */
1312
1313    memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1314                          "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1315    pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1316                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1317
1318    /* set up memory-mapped region for MSI-X */
1319
1320    memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1321                       ROCKER_PCI_MSIX_BAR_SIZE);
1322    pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1323                     PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1324
1325    /* MSI-X init */
1326
1327    err = rocker_msix_init(r, errp);
1328    if (err) {
1329        goto err_msix_init;
1330    }
1331
1332    /* validate switch properties */
1333
1334    if (!r->name) {
1335        r->name = g_strdup(TYPE_ROCKER);
1336    }
1337
1338    if (rocker_find(r->name)) {
1339        error_setg(errp, "%s already exists", r->name);
1340        goto err_duplicate;
1341    }
1342
1343    /* Rocker name is passed in port name requests to OS with the intention
1344     * that the name is used in interface names. Limit the length of the
1345     * rocker name to avoid naming problems in the OS. Also, adding the
1346     * port number as p# and unganged breakout b#, where # is at most 2
1347     * digits, so leave room for it too (-1 for string terminator, -3 for
1348     * p# and -3 for b#)
1349     */
1350#define ROCKER_IFNAMSIZ 16
1351#define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1352    if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1353        error_setg(errp,
1354                "name too long; please shorten to at most %d chars",
1355                MAX_ROCKER_NAME_LEN);
1356        goto err_name_too_long;
1357    }
1358
1359    if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1360        memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1361        r->fp_start_macaddr.a[4] += (sw_index++);
1362    }
1363
1364    if (!r->switch_id) {
1365        memcpy(&r->switch_id, &r->fp_start_macaddr,
1366               sizeof(r->fp_start_macaddr));
1367    }
1368
1369    if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1370        r->fp_ports = ROCKER_FP_PORTS_MAX;
1371    }
1372
1373    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1374
1375    /* Rings are ordered like this:
1376     * - command ring
1377     * - event ring
1378     * - port0 tx ring
1379     * - port0 rx ring
1380     * - port1 tx ring
1381     * - port1 rx ring
1382     * .....
1383     */
1384
1385    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1386        DescRing *ring = desc_ring_alloc(r, i);
1387
1388        if (i == ROCKER_RING_CMD) {
1389            desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1390        } else if (i == ROCKER_RING_EVENT) {
1391            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1392        } else if (i % 2 == 0) {
1393            desc_ring_set_consume(ring, tx_consume,
1394                                  ROCKER_MSIX_VEC_TX((i - 2) / 2));
1395        } else if (i % 2 == 1) {
1396            desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1397        }
1398
1399        r->rings[i] = ring;
1400    }
1401
1402    for (i = 0; i < r->fp_ports; i++) {
1403        FpPort *port =
1404            fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1405                          i, &r->fp_ports_peers[i]);
1406
1407        r->fp_port[i] = port;
1408        fp_port_set_world(port, r->world_dflt);
1409    }
1410
1411    QLIST_INSERT_HEAD(&rockers, r, next);
1412
1413    return;
1414
1415err_name_too_long:
1416err_duplicate:
1417    rocker_msix_uninit(r);
1418err_msix_init:
1419    object_unparent(OBJECT(&r->msix_bar));
1420    object_unparent(OBJECT(&r->mmio));
1421err_world_type_by_name:
1422    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1423        if (r->worlds[i]) {
1424            world_free(r->worlds[i]);
1425        }
1426    }
1427}
1428
1429static void pci_rocker_uninit(PCIDevice *dev)
1430{
1431    Rocker *r = ROCKER(dev);
1432    int i;
1433
1434    QLIST_REMOVE(r, next);
1435
1436    for (i = 0; i < r->fp_ports; i++) {
1437        FpPort *port = r->fp_port[i];
1438
1439        fp_port_free(port);
1440        r->fp_port[i] = NULL;
1441    }
1442
1443    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1444        if (r->rings[i]) {
1445            desc_ring_free(r->rings[i]);
1446        }
1447    }
1448    g_free(r->rings);
1449
1450    rocker_msix_uninit(r);
1451    object_unparent(OBJECT(&r->msix_bar));
1452    object_unparent(OBJECT(&r->mmio));
1453
1454    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1455        if (r->worlds[i]) {
1456            world_free(r->worlds[i]);
1457        }
1458    }
1459    g_free(r->fp_ports_peers);
1460}
1461
1462static void rocker_reset(DeviceState *dev)
1463{
1464    Rocker *r = ROCKER(dev);
1465    int i;
1466
1467    for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1468        if (r->worlds[i]) {
1469            world_reset(r->worlds[i]);
1470        }
1471    }
1472    for (i = 0; i < r->fp_ports; i++) {
1473        fp_port_reset(r->fp_port[i]);
1474        fp_port_set_world(r->fp_port[i], r->world_dflt);
1475    }
1476
1477    r->test_reg = 0;
1478    r->test_reg64 = 0;
1479    r->test_dma_addr = 0;
1480    r->test_dma_size = 0;
1481
1482    for (i = 0; i < rocker_pci_ring_count(r); i++) {
1483        desc_ring_reset(r->rings[i]);
1484    }
1485
1486    DPRINTF("Reset done\n");
1487}
1488
1489static Property rocker_properties[] = {
1490    DEFINE_PROP_STRING("name", Rocker, name),
1491    DEFINE_PROP_STRING("world", Rocker, world_name),
1492    DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1493                        fp_start_macaddr),
1494    DEFINE_PROP_UINT64("switch_id", Rocker,
1495                       switch_id, 0),
1496    DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1497                      fp_ports_peers, qdev_prop_netdev, NICPeers),
1498    DEFINE_PROP_END_OF_LIST(),
1499};
1500
1501static const VMStateDescription rocker_vmsd = {
1502    .name = TYPE_ROCKER,
1503    .unmigratable = 1,
1504};
1505
1506static void rocker_class_init(ObjectClass *klass, void *data)
1507{
1508    DeviceClass *dc = DEVICE_CLASS(klass);
1509    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1510
1511    k->realize = pci_rocker_realize;
1512    k->exit = pci_rocker_uninit;
1513    k->vendor_id = PCI_VENDOR_ID_REDHAT;
1514    k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1515    k->revision = ROCKER_PCI_REVISION;
1516    k->class_id = PCI_CLASS_NETWORK_OTHER;
1517    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1518    dc->desc = "Rocker Switch";
1519    dc->reset = rocker_reset;
1520    dc->props = rocker_properties;
1521    dc->vmsd = &rocker_vmsd;
1522}
1523
1524static const TypeInfo rocker_info = {
1525    .name          = TYPE_ROCKER,
1526    .parent        = TYPE_PCI_DEVICE,
1527    .instance_size = sizeof(Rocker),
1528    .class_init    = rocker_class_init,
1529    .interfaces = (InterfaceInfo[]) {
1530        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1531        { },
1532    },
1533};
1534
1535static void rocker_register_types(void)
1536{
1537    type_register_static(&rocker_info);
1538}
1539
1540type_init(rocker_register_types)
1541