linux/drivers/net/ethernet/sun/sunvnet_common.c
<<
>>
Prefs
   1/* sunvnet.c: Sun LDOM Virtual Network Driver.
   2 *
   3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4 * Copyright (C) 2016-2017 Oracle. All rights reserved.
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/kernel.h>
   9#include <linux/types.h>
  10#include <linux/slab.h>
  11#include <linux/delay.h>
  12#include <linux/init.h>
  13#include <linux/netdevice.h>
  14#include <linux/ethtool.h>
  15#include <linux/etherdevice.h>
  16#include <linux/mutex.h>
  17#include <linux/highmem.h>
  18#include <linux/if_vlan.h>
  19#define CREATE_TRACE_POINTS
  20#include <trace/events/sunvnet.h>
  21
  22#if IS_ENABLED(CONFIG_IPV6)
  23#include <linux/icmpv6.h>
  24#endif
  25
  26#include <net/ip.h>
  27#include <net/icmp.h>
  28#include <net/route.h>
  29
  30#include <asm/vio.h>
  31#include <asm/ldc.h>
  32
  33#include "sunvnet_common.h"
  34
  35/* Heuristic for the number of times to exponentially backoff and
  36 * retry sending an LDC trigger when EAGAIN is encountered
  37 */
  38#define VNET_MAX_RETRIES        10
  39
  40MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
  41MODULE_DESCRIPTION("Sun LDOM virtual network support library");
  42MODULE_LICENSE("GPL");
  43MODULE_VERSION("1.1");
  44
  45static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
  46
  47static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
  48{
  49        return vio_dring_avail(dr, VNET_TX_RING_SIZE);
  50}
  51
  52static int vnet_handle_unknown(struct vnet_port *port, void *arg)
  53{
  54        struct vio_msg_tag *pkt = arg;
  55
  56        pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
  57               pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
  58        pr_err("Resetting connection\n");
  59
  60        ldc_disconnect(port->vio.lp);
  61
  62        return -ECONNRESET;
  63}
  64
  65static int vnet_port_alloc_tx_ring(struct vnet_port *port);
  66
  67int sunvnet_send_attr_common(struct vio_driver_state *vio)
  68{
  69        struct vnet_port *port = to_vnet_port(vio);
  70        struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  71        struct vio_net_attr_info pkt;
  72        int framelen = ETH_FRAME_LEN;
  73        int i, err;
  74
  75        err = vnet_port_alloc_tx_ring(to_vnet_port(vio));
  76        if (err)
  77                return err;
  78
  79        memset(&pkt, 0, sizeof(pkt));
  80        pkt.tag.type = VIO_TYPE_CTRL;
  81        pkt.tag.stype = VIO_SUBTYPE_INFO;
  82        pkt.tag.stype_env = VIO_ATTR_INFO;
  83        pkt.tag.sid = vio_send_sid(vio);
  84        if (vio_version_before(vio, 1, 2))
  85                pkt.xfer_mode = VIO_DRING_MODE;
  86        else
  87                pkt.xfer_mode = VIO_NEW_DRING_MODE;
  88        pkt.addr_type = VNET_ADDR_ETHERMAC;
  89        pkt.ack_freq = 0;
  90        for (i = 0; i < 6; i++)
  91                pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
  92        if (vio_version_after(vio, 1, 3)) {
  93                if (port->rmtu) {
  94                        port->rmtu = min(VNET_MAXPACKET, port->rmtu);
  95                        pkt.mtu = port->rmtu;
  96                } else {
  97                        port->rmtu = VNET_MAXPACKET;
  98                        pkt.mtu = port->rmtu;
  99                }
 100                if (vio_version_after_eq(vio, 1, 6))
 101                        pkt.options = VIO_TX_DRING;
 102        } else if (vio_version_before(vio, 1, 3)) {
 103                pkt.mtu = framelen;
 104        } else { /* v1.3 */
 105                pkt.mtu = framelen + VLAN_HLEN;
 106        }
 107
 108        pkt.cflags = 0;
 109        if (vio_version_after_eq(vio, 1, 7) && port->tso) {
 110                pkt.cflags |= VNET_LSO_IPV4_CAPAB;
 111                if (!port->tsolen)
 112                        port->tsolen = VNET_MAXTSO;
 113                pkt.ipv4_lso_maxlen = port->tsolen;
 114        }
 115
 116        pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
 117
 118        viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
 119               "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
 120               "cflags[0x%04x] lso_max[%u]\n",
 121               pkt.xfer_mode, pkt.addr_type,
 122               (unsigned long long)pkt.addr,
 123               pkt.ack_freq, pkt.plnk_updt, pkt.options,
 124               (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
 125
 126        return vio_ldc_send(vio, &pkt, sizeof(pkt));
 127}
 128EXPORT_SYMBOL_GPL(sunvnet_send_attr_common);
 129
 130static int handle_attr_info(struct vio_driver_state *vio,
 131                            struct vio_net_attr_info *pkt)
 132{
 133        struct vnet_port *port = to_vnet_port(vio);
 134        u64     localmtu;
 135        u8      xfer_mode;
 136
 137        viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
 138               "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
 139               " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
 140               pkt->xfer_mode, pkt->addr_type,
 141               (unsigned long long)pkt->addr,
 142               pkt->ack_freq, pkt->plnk_updt, pkt->options,
 143               (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
 144               pkt->ipv4_lso_maxlen);
 145
 146        pkt->tag.sid = vio_send_sid(vio);
 147
 148        xfer_mode = pkt->xfer_mode;
 149        /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
 150        if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
 151                xfer_mode = VIO_NEW_DRING_MODE;
 152
 153        /* MTU negotiation:
 154         *      < v1.3 - ETH_FRAME_LEN exactly
 155         *      > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
 156         *                      pkt->mtu for ACK
 157         *      = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
 158         */
 159        if (vio_version_before(vio, 1, 3)) {
 160                localmtu = ETH_FRAME_LEN;
 161        } else if (vio_version_after(vio, 1, 3)) {
 162                localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
 163                localmtu = min(pkt->mtu, localmtu);
 164                pkt->mtu = localmtu;
 165        } else { /* v1.3 */
 166                localmtu = ETH_FRAME_LEN + VLAN_HLEN;
 167        }
 168        port->rmtu = localmtu;
 169
 170        /* LSO negotiation */
 171        if (vio_version_after_eq(vio, 1, 7))
 172                port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB);
 173        else
 174                port->tso = false;
 175        if (port->tso) {
 176                if (!port->tsolen)
 177                        port->tsolen = VNET_MAXTSO;
 178                port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen);
 179                if (port->tsolen < VNET_MINTSO) {
 180                        port->tso = false;
 181                        port->tsolen = 0;
 182                        pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
 183                }
 184                pkt->ipv4_lso_maxlen = port->tsolen;
 185        } else {
 186                pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
 187                pkt->ipv4_lso_maxlen = 0;
 188                port->tsolen = 0;
 189        }
 190
 191        /* for version >= 1.6, ACK packet mode we support */
 192        if (vio_version_after_eq(vio, 1, 6)) {
 193                pkt->xfer_mode = VIO_NEW_DRING_MODE;
 194                pkt->options = VIO_TX_DRING;
 195        }
 196
 197        if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
 198            pkt->addr_type != VNET_ADDR_ETHERMAC ||
 199            pkt->mtu != localmtu) {
 200                viodbg(HS, "SEND NET ATTR NACK\n");
 201
 202                pkt->tag.stype = VIO_SUBTYPE_NACK;
 203
 204                (void)vio_ldc_send(vio, pkt, sizeof(*pkt));
 205
 206                return -ECONNRESET;
 207        }
 208
 209        viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
 210               "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
 211               "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
 212               pkt->xfer_mode, pkt->addr_type,
 213               (unsigned long long)pkt->addr,
 214               pkt->ack_freq, pkt->plnk_updt, pkt->options,
 215               (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
 216               pkt->ipv4_lso_maxlen);
 217
 218        pkt->tag.stype = VIO_SUBTYPE_ACK;
 219
 220        return vio_ldc_send(vio, pkt, sizeof(*pkt));
 221}
 222
 223static int handle_attr_ack(struct vio_driver_state *vio,
 224                           struct vio_net_attr_info *pkt)
 225{
 226        viodbg(HS, "GOT NET ATTR ACK\n");
 227
 228        return 0;
 229}
 230
 231static int handle_attr_nack(struct vio_driver_state *vio,
 232                            struct vio_net_attr_info *pkt)
 233{
 234        viodbg(HS, "GOT NET ATTR NACK\n");
 235
 236        return -ECONNRESET;
 237}
 238
 239int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg)
 240{
 241        struct vio_net_attr_info *pkt = arg;
 242
 243        switch (pkt->tag.stype) {
 244        case VIO_SUBTYPE_INFO:
 245                return handle_attr_info(vio, pkt);
 246
 247        case VIO_SUBTYPE_ACK:
 248                return handle_attr_ack(vio, pkt);
 249
 250        case VIO_SUBTYPE_NACK:
 251                return handle_attr_nack(vio, pkt);
 252
 253        default:
 254                return -ECONNRESET;
 255        }
 256}
 257EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common);
 258
 259void sunvnet_handshake_complete_common(struct vio_driver_state *vio)
 260{
 261        struct vio_dring_state *dr;
 262
 263        dr = &vio->drings[VIO_DRIVER_RX_RING];
 264        dr->rcv_nxt = 1;
 265        dr->snd_nxt = 1;
 266
 267        dr = &vio->drings[VIO_DRIVER_TX_RING];
 268        dr->rcv_nxt = 1;
 269        dr->snd_nxt = 1;
 270}
 271EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common);
 272
 273/* The hypervisor interface that implements copying to/from imported
 274 * memory from another domain requires that copies are done to 8-byte
 275 * aligned buffers, and that the lengths of such copies are also 8-byte
 276 * multiples.
 277 *
 278 * So we align skb->data to an 8-byte multiple and pad-out the data
 279 * area so we can round the copy length up to the next multiple of
 280 * 8 for the copy.
 281 *
 282 * The transmitter puts the actual start of the packet 6 bytes into
 283 * the buffer it sends over, so that the IP headers after the ethernet
 284 * header are aligned properly.  These 6 bytes are not in the descriptor
 285 * length, they are simply implied.  This offset is represented using
 286 * the VNET_PACKET_SKIP macro.
 287 */
 288static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
 289                                           unsigned int len)
 290{
 291        struct sk_buff *skb;
 292        unsigned long addr, off;
 293
 294        skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8);
 295        if (unlikely(!skb))
 296                return NULL;
 297
 298        addr = (unsigned long)skb->data;
 299        off = ((addr + 7UL) & ~7UL) - addr;
 300        if (off)
 301                skb_reserve(skb, off);
 302
 303        return skb;
 304}
 305
 306static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
 307{
 308        struct iphdr *iph = ip_hdr(skb);
 309        int offset = skb_transport_offset(skb);
 310
 311        if (skb->protocol != htons(ETH_P_IP))
 312                return;
 313        if (iph->protocol != IPPROTO_TCP &&
 314            iph->protocol != IPPROTO_UDP)
 315                return;
 316        skb->ip_summed = CHECKSUM_NONE;
 317        skb->csum_level = 1;
 318        skb->csum = 0;
 319        if (iph->protocol == IPPROTO_TCP) {
 320                struct tcphdr *ptcp = tcp_hdr(skb);
 321
 322                ptcp->check = 0;
 323                skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 324                ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 325                                                skb->len - offset, IPPROTO_TCP,
 326                                                skb->csum);
 327        } else if (iph->protocol == IPPROTO_UDP) {
 328                struct udphdr *pudp = udp_hdr(skb);
 329
 330                pudp->check = 0;
 331                skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 332                pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 333                                                skb->len - offset, IPPROTO_UDP,
 334                                                skb->csum);
 335        }
 336}
 337
 338#if IS_ENABLED(CONFIG_IPV6)
 339static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
 340{
 341        struct ipv6hdr *ip6h = ipv6_hdr(skb);
 342        int offset = skb_transport_offset(skb);
 343
 344        if (skb->protocol != htons(ETH_P_IPV6))
 345                return;
 346        if (ip6h->nexthdr != IPPROTO_TCP &&
 347            ip6h->nexthdr != IPPROTO_UDP)
 348                return;
 349        skb->ip_summed = CHECKSUM_NONE;
 350        skb->csum_level = 1;
 351        skb->csum = 0;
 352        if (ip6h->nexthdr == IPPROTO_TCP) {
 353                struct tcphdr *ptcp = tcp_hdr(skb);
 354
 355                ptcp->check = 0;
 356                skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 357                ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 358                                              skb->len - offset, IPPROTO_TCP,
 359                                              skb->csum);
 360        } else if (ip6h->nexthdr == IPPROTO_UDP) {
 361                struct udphdr *pudp = udp_hdr(skb);
 362
 363                pudp->check = 0;
 364                skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 365                pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 366                                              skb->len - offset, IPPROTO_UDP,
 367                                              skb->csum);
 368        }
 369}
 370#endif
 371
 372static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 373{
 374        struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 375        unsigned int len = desc->size;
 376        unsigned int copy_len;
 377        struct sk_buff *skb;
 378        int maxlen;
 379        int err;
 380
 381        err = -EMSGSIZE;
 382        if (port->tso && port->tsolen > port->rmtu)
 383                maxlen = port->tsolen;
 384        else
 385                maxlen = port->rmtu;
 386        if (unlikely(len < ETH_ZLEN || len > maxlen)) {
 387                dev->stats.rx_length_errors++;
 388                goto out_dropped;
 389        }
 390
 391        skb = alloc_and_align_skb(dev, len);
 392        err = -ENOMEM;
 393        if (unlikely(!skb)) {
 394                dev->stats.rx_missed_errors++;
 395                goto out_dropped;
 396        }
 397
 398        copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
 399        skb_put(skb, copy_len);
 400        err = ldc_copy(port->vio.lp, LDC_COPY_IN,
 401                       skb->data, copy_len, 0,
 402                       desc->cookies, desc->ncookies);
 403        if (unlikely(err < 0)) {
 404                dev->stats.rx_frame_errors++;
 405                goto out_free_skb;
 406        }
 407
 408        skb_pull(skb, VNET_PACKET_SKIP);
 409        skb_trim(skb, len);
 410        skb->protocol = eth_type_trans(skb, dev);
 411
 412        if (vio_version_after_eq(&port->vio, 1, 8)) {
 413                struct vio_net_dext *dext = vio_net_ext(desc);
 414
 415                skb_reset_network_header(skb);
 416
 417                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
 418                        if (skb->protocol == ETH_P_IP) {
 419                                struct iphdr *iph = ip_hdr(skb);
 420
 421                                iph->check = 0;
 422                                ip_send_check(iph);
 423                        }
 424                }
 425                if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
 426                    skb->ip_summed == CHECKSUM_NONE) {
 427                        if (skb->protocol == htons(ETH_P_IP)) {
 428                                struct iphdr *iph = ip_hdr(skb);
 429                                int ihl = iph->ihl * 4;
 430
 431                                skb_set_transport_header(skb, ihl);
 432                                vnet_fullcsum_ipv4(skb);
 433#if IS_ENABLED(CONFIG_IPV6)
 434                        } else if (skb->protocol == htons(ETH_P_IPV6)) {
 435                                skb_set_transport_header(skb,
 436                                                         sizeof(struct ipv6hdr));
 437                                vnet_fullcsum_ipv6(skb);
 438#endif
 439                        }
 440                }
 441                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
 442                        skb->ip_summed = CHECKSUM_PARTIAL;
 443                        skb->csum_level = 0;
 444                        if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK)
 445                                skb->csum_level = 1;
 446                }
 447        }
 448
 449        skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL;
 450
 451        if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest)))
 452                dev->stats.multicast++;
 453        dev->stats.rx_packets++;
 454        dev->stats.rx_bytes += len;
 455        port->stats.rx_packets++;
 456        port->stats.rx_bytes += len;
 457        napi_gro_receive(&port->napi, skb);
 458        return 0;
 459
 460out_free_skb:
 461        kfree_skb(skb);
 462
 463out_dropped:
 464        dev->stats.rx_dropped++;
 465        return err;
 466}
 467
 468static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
 469                         u32 start, u32 end, u8 vio_dring_state)
 470{
 471        struct vio_dring_data hdr = {
 472                .tag = {
 473                        .type           = VIO_TYPE_DATA,
 474                        .stype          = VIO_SUBTYPE_ACK,
 475                        .stype_env      = VIO_DRING_DATA,
 476                        .sid            = vio_send_sid(&port->vio),
 477                },
 478                .dring_ident            = dr->ident,
 479                .start_idx              = start,
 480                .end_idx                = end,
 481                .state                  = vio_dring_state,
 482        };
 483        int err, delay;
 484        int retries = 0;
 485
 486        hdr.seq = dr->snd_nxt;
 487        delay = 1;
 488        do {
 489                err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
 490                if (err > 0) {
 491                        dr->snd_nxt++;
 492                        break;
 493                }
 494                udelay(delay);
 495                if ((delay <<= 1) > 128)
 496                        delay = 128;
 497                if (retries++ > VNET_MAX_RETRIES) {
 498                        pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
 499                                port->raddr[0], port->raddr[1],
 500                                port->raddr[2], port->raddr[3],
 501                                port->raddr[4], port->raddr[5]);
 502                        break;
 503                }
 504        } while (err == -EAGAIN);
 505
 506        if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
 507                port->stop_rx_idx = end;
 508                port->stop_rx = true;
 509        } else {
 510                port->stop_rx_idx = 0;
 511                port->stop_rx = false;
 512        }
 513
 514        return err;
 515}
 516
 517static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
 518                                        struct vio_dring_state *dr,
 519                                        u32 index)
 520{
 521        struct vio_net_desc *desc = port->vio.desc_buf;
 522        int err;
 523
 524        err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
 525                                  (index * dr->entry_size),
 526                                  dr->cookies, dr->ncookies);
 527        if (err < 0)
 528                return ERR_PTR(err);
 529
 530        return desc;
 531}
 532
 533static int put_rx_desc(struct vnet_port *port,
 534                       struct vio_dring_state *dr,
 535                       struct vio_net_desc *desc,
 536                       u32 index)
 537{
 538        int err;
 539
 540        err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
 541                                  (index * dr->entry_size),
 542                                  dr->cookies, dr->ncookies);
 543        if (err < 0)
 544                return err;
 545
 546        return 0;
 547}
 548
 549static int vnet_walk_rx_one(struct vnet_port *port,
 550                            struct vio_dring_state *dr,
 551                            u32 index, int *needs_ack)
 552{
 553        struct vio_net_desc *desc = get_rx_desc(port, dr, index);
 554        struct vio_driver_state *vio = &port->vio;
 555        int err;
 556
 557        BUG_ON(!desc);
 558        if (IS_ERR(desc))
 559                return PTR_ERR(desc);
 560
 561        if (desc->hdr.state != VIO_DESC_READY)
 562                return 1;
 563
 564        dma_rmb();
 565
 566        viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
 567               desc->hdr.state, desc->hdr.ack,
 568               desc->size, desc->ncookies,
 569               desc->cookies[0].cookie_addr,
 570               desc->cookies[0].cookie_size);
 571
 572        err = vnet_rx_one(port, desc);
 573        if (err == -ECONNRESET)
 574                return err;
 575        trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid,
 576                          index, desc->hdr.ack);
 577        desc->hdr.state = VIO_DESC_DONE;
 578        err = put_rx_desc(port, dr, desc, index);
 579        if (err < 0)
 580                return err;
 581        *needs_ack = desc->hdr.ack;
 582        return 0;
 583}
 584
 585static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
 586                        u32 start, u32 end, int *npkts, int budget)
 587{
 588        struct vio_driver_state *vio = &port->vio;
 589        int ack_start = -1, ack_end = -1;
 590        bool send_ack = true;
 591
 592        end = (end == (u32)-1) ? vio_dring_prev(dr, start)
 593                               : vio_dring_next(dr, end);
 594
 595        viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
 596
 597        while (start != end) {
 598                int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
 599
 600                if (err == -ECONNRESET)
 601                        return err;
 602                if (err != 0)
 603                        break;
 604                (*npkts)++;
 605                if (ack_start == -1)
 606                        ack_start = start;
 607                ack_end = start;
 608                start = vio_dring_next(dr, start);
 609                if (ack && start != end) {
 610                        err = vnet_send_ack(port, dr, ack_start, ack_end,
 611                                            VIO_DRING_ACTIVE);
 612                        if (err == -ECONNRESET)
 613                                return err;
 614                        ack_start = -1;
 615                }
 616                if ((*npkts) >= budget) {
 617                        send_ack = false;
 618                        break;
 619                }
 620        }
 621        if (unlikely(ack_start == -1)) {
 622                ack_end = vio_dring_prev(dr, start);
 623                ack_start = ack_end;
 624        }
 625        if (send_ack) {
 626                port->napi_resume = false;
 627                trace_vnet_tx_send_stopped_ack(port->vio._local_sid,
 628                                               port->vio._peer_sid,
 629                                               ack_end, *npkts);
 630                return vnet_send_ack(port, dr, ack_start, ack_end,
 631                                     VIO_DRING_STOPPED);
 632        } else  {
 633                trace_vnet_tx_defer_stopped_ack(port->vio._local_sid,
 634                                                port->vio._peer_sid,
 635                                                ack_end, *npkts);
 636                port->napi_resume = true;
 637                port->napi_stop_idx = ack_end;
 638                return 1;
 639        }
 640}
 641
 642static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts,
 643                   int budget)
 644{
 645        struct vio_dring_data *pkt = msgbuf;
 646        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
 647        struct vio_driver_state *vio = &port->vio;
 648
 649        viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
 650               pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
 651
 652        if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
 653                return 0;
 654        if (unlikely(pkt->seq != dr->rcv_nxt)) {
 655                pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
 656                       pkt->seq, dr->rcv_nxt);
 657                return 0;
 658        }
 659
 660        if (!port->napi_resume)
 661                dr->rcv_nxt++;
 662
 663        /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
 664
 665        return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx,
 666                            npkts, budget);
 667}
 668
 669static int idx_is_pending(struct vio_dring_state *dr, u32 end)
 670{
 671        u32 idx = dr->cons;
 672        int found = 0;
 673
 674        while (idx != dr->prod) {
 675                if (idx == end) {
 676                        found = 1;
 677                        break;
 678                }
 679                idx = vio_dring_next(dr, idx);
 680        }
 681        return found;
 682}
 683
 684static int vnet_ack(struct vnet_port *port, void *msgbuf)
 685{
 686        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 687        struct vio_dring_data *pkt = msgbuf;
 688        struct net_device *dev;
 689        u32 end;
 690        struct vio_net_desc *desc;
 691        struct netdev_queue *txq;
 692
 693        if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
 694                return 0;
 695
 696        end = pkt->end_idx;
 697        dev = VNET_PORT_TO_NET_DEVICE(port);
 698        netif_tx_lock(dev);
 699        if (unlikely(!idx_is_pending(dr, end))) {
 700                netif_tx_unlock(dev);
 701                return 0;
 702        }
 703
 704        /* sync for race conditions with vnet_start_xmit() and tell xmit it
 705         * is time to send a trigger.
 706         */
 707        trace_vnet_rx_stopped_ack(port->vio._local_sid,
 708                                  port->vio._peer_sid, end);
 709        dr->cons = vio_dring_next(dr, end);
 710        desc = vio_dring_entry(dr, dr->cons);
 711        if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
 712                /* vnet_start_xmit() just populated this dring but missed
 713                 * sending the "start" LDC message to the consumer.
 714                 * Send a "start" trigger on its behalf.
 715                 */
 716                if (__vnet_tx_trigger(port, dr->cons) > 0)
 717                        port->start_cons = false;
 718                else
 719                        port->start_cons = true;
 720        } else {
 721                port->start_cons = true;
 722        }
 723        netif_tx_unlock(dev);
 724
 725        txq = netdev_get_tx_queue(dev, port->q_index);
 726        if (unlikely(netif_tx_queue_stopped(txq) &&
 727                     vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
 728                return 1;
 729
 730        return 0;
 731}
 732
 733static int vnet_nack(struct vnet_port *port, void *msgbuf)
 734{
 735        /* XXX just reset or similar XXX */
 736        return 0;
 737}
 738
 739static int handle_mcast(struct vnet_port *port, void *msgbuf)
 740{
 741        struct vio_net_mcast_info *pkt = msgbuf;
 742        struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 743
 744        if (pkt->tag.stype != VIO_SUBTYPE_ACK)
 745                pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
 746                       dev->name,
 747                       pkt->tag.type,
 748                       pkt->tag.stype,
 749                       pkt->tag.stype_env,
 750                       pkt->tag.sid);
 751
 752        return 0;
 753}
 754
 755/* If the queue is stopped, wake it up so that we'll
 756 * send out another START message at the next TX.
 757 */
 758static void maybe_tx_wakeup(struct vnet_port *port)
 759{
 760        struct netdev_queue *txq;
 761
 762        txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
 763                                  port->q_index);
 764        __netif_tx_lock(txq, smp_processor_id());
 765        if (likely(netif_tx_queue_stopped(txq)))
 766                netif_tx_wake_queue(txq);
 767        __netif_tx_unlock(txq);
 768}
 769
 770bool sunvnet_port_is_up_common(struct vnet_port *vnet)
 771{
 772        struct vio_driver_state *vio = &vnet->vio;
 773
 774        return !!(vio->hs_state & VIO_HS_COMPLETE);
 775}
 776EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
 777
 778static int vnet_event_napi(struct vnet_port *port, int budget)
 779{
 780        struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 781        struct vio_driver_state *vio = &port->vio;
 782        int tx_wakeup, err;
 783        int npkts = 0;
 784
 785        /* we don't expect any other bits */
 786        BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY |
 787                                  LDC_EVENT_RESET |
 788                                  LDC_EVENT_UP));
 789
 790        /* RESET takes precedent over any other event */
 791        if (port->rx_event & LDC_EVENT_RESET) {
 792                /* a link went down */
 793
 794                if (port->vsw == 1) {
 795                        netif_tx_stop_all_queues(dev);
 796                        netif_carrier_off(dev);
 797                }
 798
 799                vio_link_state_change(vio, LDC_EVENT_RESET);
 800                vnet_port_reset(port);
 801                vio_port_up(vio);
 802
 803                /* If the device is running but its tx queue was
 804                 * stopped (due to flow control), restart it.
 805                 * This is necessary since vnet_port_reset()
 806                 * clears the tx drings and thus we may never get
 807                 * back a VIO_TYPE_DATA ACK packet - which is
 808                 * the normal mechanism to restart the tx queue.
 809                 */
 810                if (netif_running(dev))
 811                        maybe_tx_wakeup(port);
 812
 813                port->rx_event = 0;
 814                port->stats.event_reset++;
 815                return 0;
 816        }
 817
 818        if (port->rx_event & LDC_EVENT_UP) {
 819                /* a link came up */
 820
 821                if (port->vsw == 1) {
 822                        netif_carrier_on(port->dev);
 823                        netif_tx_start_all_queues(port->dev);
 824                }
 825
 826                vio_link_state_change(vio, LDC_EVENT_UP);
 827                port->rx_event = 0;
 828                port->stats.event_up++;
 829                return 0;
 830        }
 831
 832        err = 0;
 833        tx_wakeup = 0;
 834        while (1) {
 835                union {
 836                        struct vio_msg_tag tag;
 837                        u64 raw[8];
 838                } msgbuf;
 839
 840                if (port->napi_resume) {
 841                        struct vio_dring_data *pkt =
 842                                (struct vio_dring_data *)&msgbuf;
 843                        struct vio_dring_state *dr =
 844                                &port->vio.drings[VIO_DRIVER_RX_RING];
 845
 846                        pkt->tag.type = VIO_TYPE_DATA;
 847                        pkt->tag.stype = VIO_SUBTYPE_INFO;
 848                        pkt->tag.stype_env = VIO_DRING_DATA;
 849                        pkt->seq = dr->rcv_nxt;
 850                        pkt->start_idx = vio_dring_next(dr,
 851                                                        port->napi_stop_idx);
 852                        pkt->end_idx = -1;
 853                } else {
 854                        err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
 855                        if (unlikely(err < 0)) {
 856                                if (err == -ECONNRESET)
 857                                        vio_conn_reset(vio);
 858                                break;
 859                        }
 860                        if (err == 0)
 861                                break;
 862                        viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
 863                               msgbuf.tag.type,
 864                               msgbuf.tag.stype,
 865                               msgbuf.tag.stype_env,
 866                               msgbuf.tag.sid);
 867                        err = vio_validate_sid(vio, &msgbuf.tag);
 868                        if (err < 0)
 869                                break;
 870                }
 871
 872                if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
 873                        if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
 874                                if (!sunvnet_port_is_up_common(port)) {
 875                                        /* failures like handshake_failure()
 876                                         * may have cleaned up dring, but
 877                                         * NAPI polling may bring us here.
 878                                         */
 879                                        err = -ECONNRESET;
 880                                        break;
 881                                }
 882                                err = vnet_rx(port, &msgbuf, &npkts, budget);
 883                                if (npkts >= budget)
 884                                        break;
 885                                if (npkts == 0)
 886                                        break;
 887                        } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
 888                                err = vnet_ack(port, &msgbuf);
 889                                if (err > 0)
 890                                        tx_wakeup |= err;
 891                        } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
 892                                err = vnet_nack(port, &msgbuf);
 893                        }
 894                } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
 895                        if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
 896                                err = handle_mcast(port, &msgbuf);
 897                        else
 898                                err = vio_control_pkt_engine(vio, &msgbuf);
 899                        if (err)
 900                                break;
 901                } else {
 902                        err = vnet_handle_unknown(port, &msgbuf);
 903                }
 904                if (err == -ECONNRESET)
 905                        break;
 906        }
 907        if (unlikely(tx_wakeup && err != -ECONNRESET))
 908                maybe_tx_wakeup(port);
 909        return npkts;
 910}
 911
 912int sunvnet_poll_common(struct napi_struct *napi, int budget)
 913{
 914        struct vnet_port *port = container_of(napi, struct vnet_port, napi);
 915        struct vio_driver_state *vio = &port->vio;
 916        int processed = vnet_event_napi(port, budget);
 917
 918        if (processed < budget) {
 919                napi_complete_done(napi, processed);
 920                port->rx_event &= ~LDC_EVENT_DATA_READY;
 921                vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED);
 922        }
 923        return processed;
 924}
 925EXPORT_SYMBOL_GPL(sunvnet_poll_common);
 926
 927void sunvnet_event_common(void *arg, int event)
 928{
 929        struct vnet_port *port = arg;
 930        struct vio_driver_state *vio = &port->vio;
 931
 932        port->rx_event |= event;
 933        vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED);
 934        napi_schedule(&port->napi);
 935}
 936EXPORT_SYMBOL_GPL(sunvnet_event_common);
 937
 938static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
 939{
 940        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 941        struct vio_dring_data hdr = {
 942                .tag = {
 943                        .type           = VIO_TYPE_DATA,
 944                        .stype          = VIO_SUBTYPE_INFO,
 945                        .stype_env      = VIO_DRING_DATA,
 946                        .sid            = vio_send_sid(&port->vio),
 947                },
 948                .dring_ident            = dr->ident,
 949                .start_idx              = start,
 950                .end_idx                = (u32)-1,
 951        };
 952        int err, delay;
 953        int retries = 0;
 954
 955        if (port->stop_rx) {
 956                trace_vnet_tx_pending_stopped_ack(port->vio._local_sid,
 957                                                  port->vio._peer_sid,
 958                                                  port->stop_rx_idx, -1);
 959                err = vnet_send_ack(port,
 960                                    &port->vio.drings[VIO_DRIVER_RX_RING],
 961                                    port->stop_rx_idx, -1,
 962                                    VIO_DRING_STOPPED);
 963                if (err <= 0)
 964                        return err;
 965        }
 966
 967        hdr.seq = dr->snd_nxt;
 968        delay = 1;
 969        do {
 970                err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
 971                if (err > 0) {
 972                        dr->snd_nxt++;
 973                        break;
 974                }
 975                udelay(delay);
 976                if ((delay <<= 1) > 128)
 977                        delay = 128;
 978                if (retries++ > VNET_MAX_RETRIES)
 979                        break;
 980        } while (err == -EAGAIN);
 981        trace_vnet_tx_trigger(port->vio._local_sid,
 982                              port->vio._peer_sid, start, err);
 983
 984        return err;
 985}
 986
 987static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
 988                                          unsigned *pending)
 989{
 990        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 991        struct sk_buff *skb = NULL;
 992        int i, txi;
 993
 994        *pending = 0;
 995
 996        txi = dr->prod;
 997        for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
 998                struct vio_net_desc *d;
 999
1000                --txi;
1001                if (txi < 0)
1002                        txi = VNET_TX_RING_SIZE - 1;
1003
1004                d = vio_dring_entry(dr, txi);
1005
1006                if (d->hdr.state == VIO_DESC_READY) {
1007                        (*pending)++;
1008                        continue;
1009                }
1010                if (port->tx_bufs[txi].skb) {
1011                        if (d->hdr.state != VIO_DESC_DONE)
1012                                pr_notice("invalid ring buffer state %d\n",
1013                                          d->hdr.state);
1014                        BUG_ON(port->tx_bufs[txi].skb->next);
1015
1016                        port->tx_bufs[txi].skb->next = skb;
1017                        skb = port->tx_bufs[txi].skb;
1018                        port->tx_bufs[txi].skb = NULL;
1019
1020                        ldc_unmap(port->vio.lp,
1021                                  port->tx_bufs[txi].cookies,
1022                                  port->tx_bufs[txi].ncookies);
1023                } else if (d->hdr.state == VIO_DESC_FREE) {
1024                        break;
1025                }
1026                d->hdr.state = VIO_DESC_FREE;
1027        }
1028        return skb;
1029}
1030
1031static inline void vnet_free_skbs(struct sk_buff *skb)
1032{
1033        struct sk_buff *next;
1034
1035        while (skb) {
1036                next = skb->next;
1037                skb->next = NULL;
1038                dev_kfree_skb(skb);
1039                skb = next;
1040        }
1041}
1042
1043void sunvnet_clean_timer_expire_common(unsigned long port0)
1044{
1045        struct vnet_port *port = (struct vnet_port *)port0;
1046        struct sk_buff *freeskbs;
1047        unsigned pending;
1048
1049        netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port));
1050        freeskbs = vnet_clean_tx_ring(port, &pending);
1051        netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port));
1052
1053        vnet_free_skbs(freeskbs);
1054
1055        if (pending)
1056                (void)mod_timer(&port->clean_timer,
1057                                jiffies + VNET_CLEAN_TIMEOUT);
1058         else
1059                del_timer(&port->clean_timer);
1060}
1061EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common);
1062
1063static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb,
1064                               struct ldc_trans_cookie *cookies, int ncookies,
1065                               unsigned int map_perm)
1066{
1067        int i, nc, err, blen;
1068
1069        /* header */
1070        blen = skb_headlen(skb);
1071        if (blen < ETH_ZLEN)
1072                blen = ETH_ZLEN;
1073        blen += VNET_PACKET_SKIP;
1074        blen += 8 - (blen & 7);
1075
1076        err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies,
1077                             ncookies, map_perm);
1078        if (err < 0)
1079                return err;
1080        nc = err;
1081
1082        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1083                skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1084                u8 *vaddr;
1085
1086                if (nc < ncookies) {
1087                        vaddr = kmap_atomic(skb_frag_page(f));
1088                        blen = skb_frag_size(f);
1089                        blen += 8 - (blen & 7);
1090                        err = ldc_map_single(lp, vaddr + f->page_offset,
1091                                             blen, cookies + nc, ncookies - nc,
1092                                             map_perm);
1093                        kunmap_atomic(vaddr);
1094                } else {
1095                        err = -EMSGSIZE;
1096                }
1097
1098                if (err < 0) {
1099                        ldc_unmap(lp, cookies, nc);
1100                        return err;
1101                }
1102                nc += err;
1103        }
1104        return nc;
1105}
1106
1107static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
1108{
1109        struct sk_buff *nskb;
1110        int i, len, pad, docopy;
1111
1112        len = skb->len;
1113        pad = 0;
1114        if (len < ETH_ZLEN) {
1115                pad += ETH_ZLEN - skb->len;
1116                len += pad;
1117        }
1118        len += VNET_PACKET_SKIP;
1119        pad += 8 - (len & 7);
1120
1121        /* make sure we have enough cookies and alignment in every frag */
1122        docopy = skb_shinfo(skb)->nr_frags >= ncookies;
1123        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1124                skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1125
1126                docopy |= f->page_offset & 7;
1127        }
1128        if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
1129            skb_tailroom(skb) < pad ||
1130            skb_headroom(skb) < VNET_PACKET_SKIP || docopy) {
1131                int start = 0, offset;
1132                __wsum csum;
1133
1134                len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN;
1135                nskb = alloc_and_align_skb(skb->dev, len);
1136                if (!nskb) {
1137                        dev_kfree_skb(skb);
1138                        return NULL;
1139                }
1140                skb_reserve(nskb, VNET_PACKET_SKIP);
1141
1142                nskb->protocol = skb->protocol;
1143                offset = skb_mac_header(skb) - skb->data;
1144                skb_set_mac_header(nskb, offset);
1145                offset = skb_network_header(skb) - skb->data;
1146                skb_set_network_header(nskb, offset);
1147                offset = skb_transport_header(skb) - skb->data;
1148                skb_set_transport_header(nskb, offset);
1149
1150                offset = 0;
1151                nskb->csum_offset = skb->csum_offset;
1152                nskb->ip_summed = skb->ip_summed;
1153
1154                if (skb->ip_summed == CHECKSUM_PARTIAL)
1155                        start = skb_checksum_start_offset(skb);
1156                if (start) {
1157                        int offset = start + nskb->csum_offset;
1158
1159                        /* copy the headers, no csum here */
1160                        if (skb_copy_bits(skb, 0, nskb->data, start)) {
1161                                dev_kfree_skb(nskb);
1162                                dev_kfree_skb(skb);
1163                                return NULL;
1164                        }
1165
1166                        /* copy the rest, with csum calculation */
1167                        *(__sum16 *)(skb->data + offset) = 0;
1168                        csum = skb_copy_and_csum_bits(skb, start,
1169                                                      nskb->data + start,
1170                                                      skb->len - start, 0);
1171
1172                        /* add in the header checksums */
1173                        if (skb->protocol == htons(ETH_P_IP)) {
1174                                struct iphdr *iph = ip_hdr(nskb);
1175
1176                                if (iph->protocol == IPPROTO_TCP ||
1177                                    iph->protocol == IPPROTO_UDP) {
1178                                        csum = csum_tcpudp_magic(iph->saddr,
1179                                                                 iph->daddr,
1180                                                                 skb->len - start,
1181                                                                 iph->protocol,
1182                                                                 csum);
1183                                }
1184                        } else if (skb->protocol == htons(ETH_P_IPV6)) {
1185                                struct ipv6hdr *ip6h = ipv6_hdr(nskb);
1186
1187                                if (ip6h->nexthdr == IPPROTO_TCP ||
1188                                    ip6h->nexthdr == IPPROTO_UDP) {
1189                                        csum = csum_ipv6_magic(&ip6h->saddr,
1190                                                               &ip6h->daddr,
1191                                                               skb->len - start,
1192                                                               ip6h->nexthdr,
1193                                                               csum);
1194                                }
1195                        }
1196
1197                        /* save the final result */
1198                        *(__sum16 *)(nskb->data + offset) = csum;
1199
1200                        nskb->ip_summed = CHECKSUM_NONE;
1201                } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
1202                        dev_kfree_skb(nskb);
1203                        dev_kfree_skb(skb);
1204                        return NULL;
1205                }
1206                (void)skb_put(nskb, skb->len);
1207                if (skb_is_gso(skb)) {
1208                        skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size;
1209                        skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
1210                }
1211                nskb->queue_mapping = skb->queue_mapping;
1212                dev_kfree_skb(skb);
1213                skb = nskb;
1214        }
1215        return skb;
1216}
1217
1218static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
1219                                struct vnet_port *(*vnet_tx_port)
1220                                (struct sk_buff *, struct net_device *))
1221{
1222        struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
1223        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1224        struct sk_buff *segs;
1225        int maclen, datalen;
1226        int status;
1227        int gso_size, gso_type, gso_segs;
1228        int hlen = skb_transport_header(skb) - skb_mac_header(skb);
1229        int proto = IPPROTO_IP;
1230
1231        if (skb->protocol == htons(ETH_P_IP))
1232                proto = ip_hdr(skb)->protocol;
1233        else if (skb->protocol == htons(ETH_P_IPV6))
1234                proto = ipv6_hdr(skb)->nexthdr;
1235
1236        if (proto == IPPROTO_TCP) {
1237                hlen += tcp_hdr(skb)->doff * 4;
1238        } else if (proto == IPPROTO_UDP) {
1239                hlen += sizeof(struct udphdr);
1240        } else {
1241                pr_err("vnet_handle_offloads GSO with unknown transport "
1242                       "protocol %d tproto %d\n", skb->protocol, proto);
1243                hlen = 128; /* XXX */
1244        }
1245        datalen = port->tsolen - hlen;
1246
1247        gso_size = skb_shinfo(skb)->gso_size;
1248        gso_type = skb_shinfo(skb)->gso_type;
1249        gso_segs = skb_shinfo(skb)->gso_segs;
1250
1251        if (port->tso && gso_size < datalen)
1252                gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen);
1253
1254        if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) {
1255                struct netdev_queue *txq;
1256
1257                txq  = netdev_get_tx_queue(dev, port->q_index);
1258                netif_tx_stop_queue(txq);
1259                if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs)
1260                        return NETDEV_TX_BUSY;
1261                netif_tx_wake_queue(txq);
1262        }
1263
1264        maclen = skb_network_header(skb) - skb_mac_header(skb);
1265        skb_pull(skb, maclen);
1266
1267        if (port->tso && gso_size < datalen) {
1268                if (skb_unclone(skb, GFP_ATOMIC))
1269                        goto out_dropped;
1270
1271                /* segment to TSO size */
1272                skb_shinfo(skb)->gso_size = datalen;
1273                skb_shinfo(skb)->gso_segs = gso_segs;
1274        }
1275        segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO);
1276        if (IS_ERR(segs))
1277                goto out_dropped;
1278
1279        skb_push(skb, maclen);
1280        skb_reset_mac_header(skb);
1281
1282        status = 0;
1283        while (segs) {
1284                struct sk_buff *curr = segs;
1285
1286                segs = segs->next;
1287                curr->next = NULL;
1288                if (port->tso && curr->len > dev->mtu) {
1289                        skb_shinfo(curr)->gso_size = gso_size;
1290                        skb_shinfo(curr)->gso_type = gso_type;
1291                        skb_shinfo(curr)->gso_segs =
1292                                DIV_ROUND_UP(curr->len - hlen, gso_size);
1293                } else {
1294                        skb_shinfo(curr)->gso_size = 0;
1295                }
1296
1297                skb_push(curr, maclen);
1298                skb_reset_mac_header(curr);
1299                memcpy(skb_mac_header(curr), skb_mac_header(skb),
1300                       maclen);
1301                curr->csum_start = skb_transport_header(curr) - curr->head;
1302                if (ip_hdr(curr)->protocol == IPPROTO_TCP)
1303                        curr->csum_offset = offsetof(struct tcphdr, check);
1304                else if (ip_hdr(curr)->protocol == IPPROTO_UDP)
1305                        curr->csum_offset = offsetof(struct udphdr, check);
1306
1307                if (!(status & NETDEV_TX_MASK))
1308                        status = sunvnet_start_xmit_common(curr, dev,
1309                                                           vnet_tx_port);
1310                if (status & NETDEV_TX_MASK)
1311                        dev_kfree_skb_any(curr);
1312        }
1313
1314        if (!(status & NETDEV_TX_MASK))
1315                dev_kfree_skb_any(skb);
1316        return status;
1317out_dropped:
1318        dev->stats.tx_dropped++;
1319        dev_kfree_skb_any(skb);
1320        return NETDEV_TX_OK;
1321}
1322
1323int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
1324                              struct vnet_port *(*vnet_tx_port)
1325                              (struct sk_buff *, struct net_device *))
1326{
1327        struct vnet_port *port = NULL;
1328        struct vio_dring_state *dr;
1329        struct vio_net_desc *d;
1330        unsigned int len;
1331        struct sk_buff *freeskbs = NULL;
1332        int i, err, txi;
1333        unsigned pending = 0;
1334        struct netdev_queue *txq;
1335
1336        rcu_read_lock();
1337        port = vnet_tx_port(skb, dev);
1338        if (unlikely(!port))
1339                goto out_dropped;
1340
1341        if (skb_is_gso(skb) && skb->len > port->tsolen) {
1342                err = vnet_handle_offloads(port, skb, vnet_tx_port);
1343                rcu_read_unlock();
1344                return err;
1345        }
1346
1347        if (!skb_is_gso(skb) && skb->len > port->rmtu) {
1348                unsigned long localmtu = port->rmtu - ETH_HLEN;
1349
1350                if (vio_version_after_eq(&port->vio, 1, 3))
1351                        localmtu -= VLAN_HLEN;
1352
1353                if (skb->protocol == htons(ETH_P_IP)) {
1354                        struct flowi4 fl4;
1355                        struct rtable *rt = NULL;
1356
1357                        memset(&fl4, 0, sizeof(fl4));
1358                        fl4.flowi4_oif = dev->ifindex;
1359                        fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
1360                        fl4.daddr = ip_hdr(skb)->daddr;
1361                        fl4.saddr = ip_hdr(skb)->saddr;
1362
1363                        rt = ip_route_output_key(dev_net(dev), &fl4);
1364                        if (!IS_ERR(rt)) {
1365                                skb_dst_set(skb, &rt->dst);
1366                                icmp_send(skb, ICMP_DEST_UNREACH,
1367                                          ICMP_FRAG_NEEDED,
1368                                          htonl(localmtu));
1369                        }
1370                }
1371#if IS_ENABLED(CONFIG_IPV6)
1372                else if (skb->protocol == htons(ETH_P_IPV6))
1373                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
1374#endif
1375                goto out_dropped;
1376        }
1377
1378        skb = vnet_skb_shape(skb, 2);
1379
1380        if (unlikely(!skb))
1381                goto out_dropped;
1382
1383        if (skb->ip_summed == CHECKSUM_PARTIAL) {
1384                if (skb->protocol == htons(ETH_P_IP))
1385                        vnet_fullcsum_ipv4(skb);
1386#if IS_ENABLED(CONFIG_IPV6)
1387                else if (skb->protocol == htons(ETH_P_IPV6))
1388                        vnet_fullcsum_ipv6(skb);
1389#endif
1390        }
1391
1392        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1393        i = skb_get_queue_mapping(skb);
1394        txq = netdev_get_tx_queue(dev, i);
1395        if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1396                if (!netif_tx_queue_stopped(txq)) {
1397                        netif_tx_stop_queue(txq);
1398
1399                        /* This is a hard error, log it. */
1400                        netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
1401                        dev->stats.tx_errors++;
1402                }
1403                rcu_read_unlock();
1404                return NETDEV_TX_BUSY;
1405        }
1406
1407        d = vio_dring_cur(dr);
1408
1409        txi = dr->prod;
1410
1411        freeskbs = vnet_clean_tx_ring(port, &pending);
1412
1413        BUG_ON(port->tx_bufs[txi].skb);
1414
1415        len = skb->len;
1416        if (len < ETH_ZLEN)
1417                len = ETH_ZLEN;
1418
1419        err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2,
1420                           (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
1421        if (err < 0) {
1422                netdev_info(dev, "tx buffer map error %d\n", err);
1423                goto out_dropped;
1424        }
1425
1426        port->tx_bufs[txi].skb = skb;
1427        skb = NULL;
1428        port->tx_bufs[txi].ncookies = err;
1429
1430        /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1431         * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1432         * the protocol itself does not require it as long as the peer
1433         * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1434         *
1435         * An ACK for every packet in the ring is expensive as the
1436         * sending of LDC messages is slow and affects performance.
1437         */
1438        d->hdr.ack = VIO_ACK_DISABLE;
1439        d->size = len;
1440        d->ncookies = port->tx_bufs[txi].ncookies;
1441        for (i = 0; i < d->ncookies; i++)
1442                d->cookies[i] = port->tx_bufs[txi].cookies[i];
1443        if (vio_version_after_eq(&port->vio, 1, 7)) {
1444                struct vio_net_dext *dext = vio_net_ext(d);
1445
1446                memset(dext, 0, sizeof(*dext));
1447                if (skb_is_gso(port->tx_bufs[txi].skb)) {
1448                        dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb)
1449                                             ->gso_size;
1450                        dext->flags |= VNET_PKT_IPV4_LSO;
1451                }
1452                if (vio_version_after_eq(&port->vio, 1, 8) &&
1453                    !port->switch_port) {
1454                        dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK;
1455                        dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK;
1456                }
1457        }
1458
1459        /* This has to be a non-SMP write barrier because we are writing
1460         * to memory which is shared with the peer LDOM.
1461         */
1462        dma_wmb();
1463
1464        d->hdr.state = VIO_DESC_READY;
1465
1466        /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1467         * to notify the consumer that some descriptors are READY.
1468         * After that "start" trigger, no additional triggers are needed until
1469         * a DRING_STOPPED is received from the consumer. The dr->cons field
1470         * (set up by vnet_ack()) has the value of the next dring index
1471         * that has not yet been ack-ed. We send a "start" trigger here
1472         * if, and only if, start_cons is true (reset it afterward). Conversely,
1473         * vnet_ack() should check if the dring corresponding to cons
1474         * is marked READY, but start_cons was false.
1475         * If so, vnet_ack() should send out the missed "start" trigger.
1476         *
1477         * Note that the dma_wmb() above makes sure the cookies et al. are
1478         * not globally visible before the VIO_DESC_READY, and that the
1479         * stores are ordered correctly by the compiler. The consumer will
1480         * not proceed until the VIO_DESC_READY is visible assuring that
1481         * the consumer does not observe anything related to descriptors
1482         * out of order. The HV trap from the LDC start trigger is the
1483         * producer to consumer announcement that work is available to the
1484         * consumer
1485         */
1486        if (!port->start_cons) { /* previous trigger suffices */
1487                trace_vnet_skip_tx_trigger(port->vio._local_sid,
1488                                           port->vio._peer_sid, dr->cons);
1489                goto ldc_start_done;
1490        }
1491
1492        err = __vnet_tx_trigger(port, dr->cons);
1493        if (unlikely(err < 0)) {
1494                netdev_info(dev, "TX trigger error %d\n", err);
1495                d->hdr.state = VIO_DESC_FREE;
1496                skb = port->tx_bufs[txi].skb;
1497                port->tx_bufs[txi].skb = NULL;
1498                dev->stats.tx_carrier_errors++;
1499                goto out_dropped;
1500        }
1501
1502ldc_start_done:
1503        port->start_cons = false;
1504
1505        dev->stats.tx_packets++;
1506        dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1507        port->stats.tx_packets++;
1508        port->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1509
1510        dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
1511        if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1512                netif_tx_stop_queue(txq);
1513                smp_rmb();
1514                if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
1515                        netif_tx_wake_queue(txq);
1516        }
1517
1518        (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
1519        rcu_read_unlock();
1520
1521        vnet_free_skbs(freeskbs);
1522
1523        return NETDEV_TX_OK;
1524
1525out_dropped:
1526        if (pending)
1527                (void)mod_timer(&port->clean_timer,
1528                                jiffies + VNET_CLEAN_TIMEOUT);
1529        else if (port)
1530                del_timer(&port->clean_timer);
1531        rcu_read_unlock();
1532        if (skb)
1533                dev_kfree_skb(skb);
1534        vnet_free_skbs(freeskbs);
1535        dev->stats.tx_dropped++;
1536        return NETDEV_TX_OK;
1537}
1538EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
1539
1540void sunvnet_tx_timeout_common(struct net_device *dev)
1541{
1542        /* XXX Implement me XXX */
1543}
1544EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common);
1545
1546int sunvnet_open_common(struct net_device *dev)
1547{
1548        netif_carrier_on(dev);
1549        netif_tx_start_all_queues(dev);
1550
1551        return 0;
1552}
1553EXPORT_SYMBOL_GPL(sunvnet_open_common);
1554
1555int sunvnet_close_common(struct net_device *dev)
1556{
1557        netif_tx_stop_all_queues(dev);
1558        netif_carrier_off(dev);
1559
1560        return 0;
1561}
1562EXPORT_SYMBOL_GPL(sunvnet_close_common);
1563
1564static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
1565{
1566        struct vnet_mcast_entry *m;
1567
1568        for (m = vp->mcast_list; m; m = m->next) {
1569                if (ether_addr_equal(m->addr, addr))
1570                        return m;
1571        }
1572        return NULL;
1573}
1574
1575static void __update_mc_list(struct vnet *vp, struct net_device *dev)
1576{
1577        struct netdev_hw_addr *ha;
1578
1579        netdev_for_each_mc_addr(ha, dev) {
1580                struct vnet_mcast_entry *m;
1581
1582                m = __vnet_mc_find(vp, ha->addr);
1583                if (m) {
1584                        m->hit = 1;
1585                        continue;
1586                }
1587
1588                if (!m) {
1589                        m = kzalloc(sizeof(*m), GFP_ATOMIC);
1590                        if (!m)
1591                                continue;
1592                        memcpy(m->addr, ha->addr, ETH_ALEN);
1593                        m->hit = 1;
1594
1595                        m->next = vp->mcast_list;
1596                        vp->mcast_list = m;
1597                }
1598        }
1599}
1600
1601static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
1602{
1603        struct vio_net_mcast_info info;
1604        struct vnet_mcast_entry *m, **pp;
1605        int n_addrs;
1606
1607        memset(&info, 0, sizeof(info));
1608
1609        info.tag.type = VIO_TYPE_CTRL;
1610        info.tag.stype = VIO_SUBTYPE_INFO;
1611        info.tag.stype_env = VNET_MCAST_INFO;
1612        info.tag.sid = vio_send_sid(&port->vio);
1613        info.set = 1;
1614
1615        n_addrs = 0;
1616        for (m = vp->mcast_list; m; m = m->next) {
1617                if (m->sent)
1618                        continue;
1619                m->sent = 1;
1620                memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1621                       m->addr, ETH_ALEN);
1622                if (++n_addrs == VNET_NUM_MCAST) {
1623                        info.count = n_addrs;
1624
1625                        (void)vio_ldc_send(&port->vio, &info,
1626                                           sizeof(info));
1627                        n_addrs = 0;
1628                }
1629        }
1630        if (n_addrs) {
1631                info.count = n_addrs;
1632                (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1633        }
1634
1635        info.set = 0;
1636
1637        n_addrs = 0;
1638        pp = &vp->mcast_list;
1639        while ((m = *pp) != NULL) {
1640                if (m->hit) {
1641                        m->hit = 0;
1642                        pp = &m->next;
1643                        continue;
1644                }
1645
1646                memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1647                       m->addr, ETH_ALEN);
1648                if (++n_addrs == VNET_NUM_MCAST) {
1649                        info.count = n_addrs;
1650                        (void)vio_ldc_send(&port->vio, &info,
1651                                           sizeof(info));
1652                        n_addrs = 0;
1653                }
1654
1655                *pp = m->next;
1656                kfree(m);
1657        }
1658        if (n_addrs) {
1659                info.count = n_addrs;
1660                (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1661        }
1662}
1663
1664void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp)
1665{
1666        struct vnet_port *port;
1667
1668        rcu_read_lock();
1669        list_for_each_entry_rcu(port, &vp->port_list, list) {
1670                if (port->switch_port) {
1671                        __update_mc_list(vp, dev);
1672                        __send_mc_list(vp, port);
1673                        break;
1674                }
1675        }
1676        rcu_read_unlock();
1677}
1678EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common);
1679
1680int sunvnet_set_mac_addr_common(struct net_device *dev, void *p)
1681{
1682        return -EINVAL;
1683}
1684EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common);
1685
1686void sunvnet_port_free_tx_bufs_common(struct vnet_port *port)
1687{
1688        struct vio_dring_state *dr;
1689        int i;
1690
1691        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1692
1693        if (!dr->base)
1694                return;
1695
1696        for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1697                struct vio_net_desc *d;
1698                void *skb = port->tx_bufs[i].skb;
1699
1700                if (!skb)
1701                        continue;
1702
1703                d = vio_dring_entry(dr, i);
1704
1705                ldc_unmap(port->vio.lp,
1706                          port->tx_bufs[i].cookies,
1707                          port->tx_bufs[i].ncookies);
1708                dev_kfree_skb(skb);
1709                port->tx_bufs[i].skb = NULL;
1710                d->hdr.state = VIO_DESC_FREE;
1711        }
1712        ldc_free_exp_dring(port->vio.lp, dr->base,
1713                           (dr->entry_size * dr->num_entries),
1714                           dr->cookies, dr->ncookies);
1715        dr->base = NULL;
1716        dr->entry_size = 0;
1717        dr->num_entries = 0;
1718        dr->pending = 0;
1719        dr->ncookies = 0;
1720}
1721EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common);
1722
1723void vnet_port_reset(struct vnet_port *port)
1724{
1725        del_timer(&port->clean_timer);
1726        sunvnet_port_free_tx_bufs_common(port);
1727        port->rmtu = 0;
1728        port->tso = (port->vsw == 0);  /* no tso in vsw, misbehaves in bridge */
1729        port->tsolen = 0;
1730}
1731EXPORT_SYMBOL_GPL(vnet_port_reset);
1732
1733static int vnet_port_alloc_tx_ring(struct vnet_port *port)
1734{
1735        struct vio_dring_state *dr;
1736        unsigned long len, elen;
1737        int i, err, ncookies;
1738        void *dring;
1739
1740        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1741
1742        elen = sizeof(struct vio_net_desc) +
1743               sizeof(struct ldc_trans_cookie) * 2;
1744        if (vio_version_after_eq(&port->vio, 1, 7))
1745                elen += sizeof(struct vio_net_dext);
1746        len = VNET_TX_RING_SIZE * elen;
1747
1748        ncookies = VIO_MAX_RING_COOKIES;
1749        dring = ldc_alloc_exp_dring(port->vio.lp, len,
1750                                    dr->cookies, &ncookies,
1751                                    (LDC_MAP_SHADOW |
1752                                     LDC_MAP_DIRECT |
1753                                     LDC_MAP_RW));
1754        if (IS_ERR(dring)) {
1755                err = PTR_ERR(dring);
1756                goto err_out;
1757        }
1758
1759        dr->base = dring;
1760        dr->entry_size = elen;
1761        dr->num_entries = VNET_TX_RING_SIZE;
1762        dr->prod = 0;
1763        dr->cons = 0;
1764        port->start_cons  = true; /* need an initial trigger */
1765        dr->pending = VNET_TX_RING_SIZE;
1766        dr->ncookies = ncookies;
1767
1768        for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
1769                struct vio_net_desc *d;
1770
1771                d = vio_dring_entry(dr, i);
1772                d->hdr.state = VIO_DESC_FREE;
1773        }
1774        return 0;
1775
1776err_out:
1777        sunvnet_port_free_tx_bufs_common(port);
1778
1779        return err;
1780}
1781
1782#ifdef CONFIG_NET_POLL_CONTROLLER
1783void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp)
1784{
1785        struct vnet_port *port;
1786        unsigned long flags;
1787
1788        spin_lock_irqsave(&vp->lock, flags);
1789        if (!list_empty(&vp->port_list)) {
1790                port = list_entry(vp->port_list.next, struct vnet_port, list);
1791                napi_schedule(&port->napi);
1792        }
1793        spin_unlock_irqrestore(&vp->lock, flags);
1794}
1795EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common);
1796#endif
1797
1798void sunvnet_port_add_txq_common(struct vnet_port *port)
1799{
1800        struct vnet *vp = port->vp;
1801        int smallest = 0;
1802        int i;
1803
1804        /* find the first least-used q
1805         * When there are more ldoms than q's, we start to
1806         * double up on ports per queue.
1807         */
1808        for (i = 0; i < VNET_MAX_TXQS; i++) {
1809                if (vp->q_used[i] == 0) {
1810                        smallest = i;
1811                        break;
1812                }
1813                if (vp->q_used[i] < vp->q_used[smallest])
1814                        smallest = i;
1815        }
1816
1817        vp->nports++;
1818        vp->q_used[smallest]++;
1819        port->q_index = smallest;
1820}
1821EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common);
1822
1823void sunvnet_port_rm_txq_common(struct vnet_port *port)
1824{
1825        port->vp->nports--;
1826        port->vp->q_used[port->q_index]--;
1827        port->q_index = 0;
1828}
1829EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common);
1830