LXR linux/arch/um/drivers/vector

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2017 - 2019 Cambridge Greys Limited
   4 * Copyright (C) 2011 - 2014 Cisco Systems Inc
   5 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   6 * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
   7 * James Leu (jleu@mindspring.net).
   8 * Copyright (C) 2001 by various other people who didn't put their name here.
   9 */
  10
  11#include <linux/version.h>
  12#include <linux/memblock.h>
  13#include <linux/etherdevice.h>
  14#include <linux/ethtool.h>
  15#include <linux/inetdevice.h>
  16#include <linux/init.h>
  17#include <linux/list.h>
  18#include <linux/netdevice.h>
  19#include <linux/platform_device.h>
  20#include <linux/rtnetlink.h>
  21#include <linux/skbuff.h>
  22#include <linux/slab.h>
  23#include <linux/interrupt.h>
  24#include <linux/firmware.h>
  25#include <linux/fs.h>
  26#include <uapi/linux/filter.h>
  27#include <init.h>
  28#include <irq_kern.h>
  29#include <irq_user.h>
  30#include <net_kern.h>
  31#include <os.h>
  32#include "mconsole_kern.h"
  33#include "vector_user.h"
  34#include "vector_kern.h"
  35
  36/*
  37 * Adapted from network devices with the following major changes:
  38 * All transports are static - simplifies the code significantly
  39 * Multiple FDs/IRQs per device
  40 * Vector IO optionally used for read/write, falling back to legacy
  41 * based on configuration and/or availability
  42 * Configuration is no longer positional - L2TPv3 and GRE require up to
  43 * 10 parameters, passing this as positional is not fit for purpose.
  44 * Only socket transports are supported
  45 */
  46
  47
  48#define DRIVER_NAME "uml-vector"
  49struct vector_cmd_line_arg {
  50        struct list_head list;
  51        int unit;
  52        char *arguments;
  53};
  54
  55struct vector_device {
  56        struct list_head list;
  57        struct net_device *dev;
  58        struct platform_device pdev;
  59        int unit;
  60        int opened;
  61};
  62
  63static LIST_HEAD(vec_cmd_line);
  64
  65static DEFINE_SPINLOCK(vector_devices_lock);
  66static LIST_HEAD(vector_devices);
  67
  68static int driver_registered;
  69
  70static void vector_eth_configure(int n, struct arglist *def);
  71
  72/* Argument accessors to set variables (and/or set default values)
  73 * mtu, buffer sizing, default headroom, etc
  74 */
  75
  76#define DEFAULT_HEADROOM 2
  77#define SAFETY_MARGIN 32
  78#define DEFAULT_VECTOR_SIZE 64
  79#define TX_SMALL_PACKET 128
  80#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
  81#define MAX_ITERATIONS 64
  82
  83static const struct {
  84        const char string[ETH_GSTRING_LEN];
  85} ethtool_stats_keys[] = {
  86        { "rx_queue_max" },
  87        { "rx_queue_running_average" },
  88        { "tx_queue_max" },
  89        { "tx_queue_running_average" },
  90        { "rx_encaps_errors" },
  91        { "tx_timeout_count" },
  92        { "tx_restart_queue" },
  93        { "tx_kicks" },
  94        { "tx_flow_control_xon" },
  95        { "tx_flow_control_xoff" },
  96        { "rx_csum_offload_good" },
  97        { "rx_csum_offload_errors"},
  98        { "sg_ok"},
  99        { "sg_linearized"},
 100};
 101
 102#define VECTOR_NUM_STATS        ARRAY_SIZE(ethtool_stats_keys)
 103
 104static void vector_reset_stats(struct vector_private *vp)
 105{
 106        vp->estats.rx_queue_max = 0;
 107        vp->estats.rx_queue_running_average = 0;
 108        vp->estats.tx_queue_max = 0;
 109        vp->estats.tx_queue_running_average = 0;
 110        vp->estats.rx_encaps_errors = 0;
 111        vp->estats.tx_timeout_count = 0;
 112        vp->estats.tx_restart_queue = 0;
 113        vp->estats.tx_kicks = 0;
 114        vp->estats.tx_flow_control_xon = 0;
 115        vp->estats.tx_flow_control_xoff = 0;
 116        vp->estats.sg_ok = 0;
 117        vp->estats.sg_linearized = 0;
 118}
 119
 120static int get_mtu(struct arglist *def)
 121{
 122        char *mtu = uml_vector_fetch_arg(def, "mtu");
 123        long result;
 124
 125        if (mtu != NULL) {
 126                if (kstrtoul(mtu, 10, &result) == 0)
 127                        if ((result < (1 << 16) - 1) && (result >= 576))
 128                                return result;
 129        }
 130        return ETH_MAX_PACKET;
 131}
 132
 133static char *get_bpf_file(struct arglist *def)
 134{
 135        return uml_vector_fetch_arg(def, "bpffile");
 136}
 137
 138static bool get_bpf_flash(struct arglist *def)
 139{
 140        char *allow = uml_vector_fetch_arg(def, "bpfflash");
 141        long result;
 142
 143        if (allow != NULL) {
 144                if (kstrtoul(allow, 10, &result) == 0)
 145                        return (allow > 0);
 146        }
 147        return false;
 148}
 149
 150static int get_depth(struct arglist *def)
 151{
 152        char *mtu = uml_vector_fetch_arg(def, "depth");
 153        long result;
 154
 155        if (mtu != NULL) {
 156                if (kstrtoul(mtu, 10, &result) == 0)
 157                        return result;
 158        }
 159        return DEFAULT_VECTOR_SIZE;
 160}
 161
 162static int get_headroom(struct arglist *def)
 163{
 164        char *mtu = uml_vector_fetch_arg(def, "headroom");
 165        long result;
 166
 167        if (mtu != NULL) {
 168                if (kstrtoul(mtu, 10, &result) == 0)
 169                        return result;
 170        }
 171        return DEFAULT_HEADROOM;
 172}
 173
 174static int get_req_size(struct arglist *def)
 175{
 176        char *gro = uml_vector_fetch_arg(def, "gro");
 177        long result;
 178
 179        if (gro != NULL) {
 180                if (kstrtoul(gro, 10, &result) == 0) {
 181                        if (result > 0)
 182                                return 65536;
 183                }
 184        }
 185        return get_mtu(def) + ETH_HEADER_OTHER +
 186                get_headroom(def) + SAFETY_MARGIN;
 187}
 188
 189
 190static int get_transport_options(struct arglist *def)
 191{
 192        char *transport = uml_vector_fetch_arg(def, "transport");
 193        char *vector = uml_vector_fetch_arg(def, "vec");
 194
 195        int vec_rx = VECTOR_RX;
 196        int vec_tx = VECTOR_TX;
 197        long parsed;
 198        int result = 0;
 199
 200        if (transport == NULL)
 201                return -EINVAL;
 202
 203        if (vector != NULL) {
 204                if (kstrtoul(vector, 10, &parsed) == 0) {
 205                        if (parsed == 0) {
 206                                vec_rx = 0;
 207                                vec_tx = 0;
 208                        }
 209                }
 210        }
 211
 212        if (get_bpf_flash(def))
 213                result = VECTOR_BPF_FLASH;
 214
 215        if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
 216                return result;
 217        if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
 218                return (result | vec_rx | VECTOR_BPF);
 219        if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
 220                return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
 221        return (result | vec_rx | vec_tx);
 222}
 223
 224
 225/* A mini-buffer for packet drop read
 226 * All of our supported transports are datagram oriented and we always
 227 * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
 228 * than the packet size it still counts as full packet read and will
 229 * clean the incoming stream to keep sigio/epoll happy
 230 */
 231
 232#define DROP_BUFFER_SIZE 32
 233
 234static char *drop_buffer;
 235
 236/* Array backed queues optimized for bulk enqueue/dequeue and
 237 * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
 238 * For more details and full design rationale see
 239 * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
 240 */
 241
 242
 243/*
 244 * Advance the mmsg queue head by n = advance. Resets the queue to
 245 * maximum enqueue/dequeue-at-once capacity if possible. Called by
 246 * dequeuers. Caller must hold the head_lock!
 247 */
 248
 249static int vector_advancehead(struct vector_queue *qi, int advance)
 250{
 251        int queue_depth;
 252
 253        qi->head =
 254                (qi->head + advance)
 255                        % qi->max_depth;
 256
 257
 258        spin_lock(&qi->tail_lock);
 259        qi->queue_depth -= advance;
 260
 261        /* we are at 0, use this to
 262         * reset head and tail so we can use max size vectors
 263         */
 264
 265        if (qi->queue_depth == 0) {
 266                qi->head = 0;
 267                qi->tail = 0;
 268        }
 269        queue_depth = qi->queue_depth;
 270        spin_unlock(&qi->tail_lock);
 271        return queue_depth;
 272}
 273
 274/*      Advance the queue tail by n = advance.
 275 *      This is called by enqueuers which should hold the
 276 *      head lock already
 277 */
 278
 279static int vector_advancetail(struct vector_queue *qi, int advance)
 280{
 281        int queue_depth;
 282
 283        qi->tail =
 284                (qi->tail + advance)
 285                        % qi->max_depth;
 286        spin_lock(&qi->head_lock);
 287        qi->queue_depth += advance;
 288        queue_depth = qi->queue_depth;
 289        spin_unlock(&qi->head_lock);
 290        return queue_depth;
 291}
 292
 293static int prep_msg(struct vector_private *vp,
 294        struct sk_buff *skb,
 295        struct iovec *iov)
 296{
 297        int iov_index = 0;
 298        int nr_frags, frag;
 299        skb_frag_t *skb_frag;
 300
 301        nr_frags = skb_shinfo(skb)->nr_frags;
 302        if (nr_frags > MAX_IOV_SIZE) {
 303                if (skb_linearize(skb) != 0)
 304                        goto drop;
 305        }
 306        if (vp->header_size > 0) {
 307                iov[iov_index].iov_len = vp->header_size;
 308                vp->form_header(iov[iov_index].iov_base, skb, vp);
 309                iov_index++;
 310        }
 311        iov[iov_index].iov_base = skb->data;
 312        if (nr_frags > 0) {
 313                iov[iov_index].iov_len = skb->len - skb->data_len;
 314                vp->estats.sg_ok++;
 315        } else
 316                iov[iov_index].iov_len = skb->len;
 317        iov_index++;
 318        for (frag = 0; frag < nr_frags; frag++) {
 319                skb_frag = &skb_shinfo(skb)->frags[frag];
 320                iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
 321                iov[iov_index].iov_len = skb_frag_size(skb_frag);
 322                iov_index++;
 323        }
 324        return iov_index;
 325drop:
 326        return -1;
 327}
 328/*
 329 * Generic vector enqueue with support for forming headers using transport
 330 * specific callback. Allows GRE, L2TPv3, RAW and other transports
 331 * to use a common enqueue procedure in vector mode
 332 */
 333
 334static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
 335{
 336        struct vector_private *vp = netdev_priv(qi->dev);
 337        int queue_depth;
 338        int packet_len;
 339        struct mmsghdr *mmsg_vector = qi->mmsg_vector;
 340        int iov_count;
 341
 342        spin_lock(&qi->tail_lock);
 343        spin_lock(&qi->head_lock);
 344        queue_depth = qi->queue_depth;
 345        spin_unlock(&qi->head_lock);
 346
 347        if (skb)
 348                packet_len = skb->len;
 349
 350        if (queue_depth < qi->max_depth) {
 351
 352                *(qi->skbuff_vector + qi->tail) = skb;
 353                mmsg_vector += qi->tail;
 354                iov_count = prep_msg(
 355                        vp,
 356                        skb,
 357                        mmsg_vector->msg_hdr.msg_iov
 358                );
 359                if (iov_count < 1)
 360                        goto drop;
 361                mmsg_vector->msg_hdr.msg_iovlen = iov_count;
 362                mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
 363                mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
 364                queue_depth = vector_advancetail(qi, 1);
 365        } else
 366                goto drop;
 367        spin_unlock(&qi->tail_lock);
 368        return queue_depth;
 369drop:
 370        qi->dev->stats.tx_dropped++;
 371        if (skb != NULL) {
 372                packet_len = skb->len;
 373                dev_consume_skb_any(skb);
 374                netdev_completed_queue(qi->dev, 1, packet_len);
 375        }
 376        spin_unlock(&qi->tail_lock);
 377        return queue_depth;
 378}
 379
 380static int consume_vector_skbs(struct vector_queue *qi, int count)
 381{
 382        struct sk_buff *skb;
 383        int skb_index;
 384        int bytes_compl = 0;
 385
 386        for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
 387                skb = *(qi->skbuff_vector + skb_index);
 388                /* mark as empty to ensure correct destruction if
 389                 * needed
 390                 */
 391                bytes_compl += skb->len;
 392                *(qi->skbuff_vector + skb_index) = NULL;
 393                dev_consume_skb_any(skb);
 394        }
 395        qi->dev->stats.tx_bytes += bytes_compl;
 396        qi->dev->stats.tx_packets += count;
 397        netdev_completed_queue(qi->dev, count, bytes_compl);
 398        return vector_advancehead(qi, count);
 399}
 400
 401/*
 402 * Generic vector deque via sendmmsg with support for forming headers
 403 * using transport specific callback. Allows GRE, L2TPv3, RAW and
 404 * other transports to use a common dequeue procedure in vector mode
 405 */
 406
 407
 408static int vector_send(struct vector_queue *qi)
 409{
 410        struct vector_private *vp = netdev_priv(qi->dev);
 411        struct mmsghdr *send_from;
 412        int result = 0, send_len, queue_depth = qi->max_depth;
 413
 414        if (spin_trylock(&qi->head_lock)) {
 415                if (spin_trylock(&qi->tail_lock)) {
 416                        /* update queue_depth to current value */
 417                        queue_depth = qi->queue_depth;
 418                        spin_unlock(&qi->tail_lock);
 419                        while (queue_depth > 0) {
 420                                /* Calculate the start of the vector */
 421                                send_len = queue_depth;
 422                                send_from = qi->mmsg_vector;
 423                                send_from += qi->head;
 424                                /* Adjust vector size if wraparound */
 425                                if (send_len + qi->head > qi->max_depth)
 426                                        send_len = qi->max_depth - qi->head;
 427                                /* Try to TX as many packets as possible */
 428                                if (send_len > 0) {
 429                                        result = uml_vector_sendmmsg(
 430                                                 vp->fds->tx_fd,
 431                                                 send_from,
 432                                                 send_len,
 433                                                 0
 434                                        );
 435                                        vp->in_write_poll =
 436                                                (result != send_len);
 437                                }
 438                                /* For some of the sendmmsg error scenarios
 439                                 * we may end being unsure in the TX success
 440                                 * for all packets. It is safer to declare
 441                                 * them all TX-ed and blame the network.
 442                                 */
 443                                if (result < 0) {
 444                                        if (net_ratelimit())
 445                                                netdev_err(vp->dev, "sendmmsg err=%i\n",
 446                                                        result);
 447                                        vp->in_error = true;
 448                                        result = send_len;
 449                                }
 450                                if (result > 0) {
 451                                        queue_depth =
 452                                                consume_vector_skbs(qi, result);
 453                                        /* This is equivalent to an TX IRQ.
 454                                         * Restart the upper layers to feed us
 455                                         * more packets.
 456                                         */
 457                                        if (result > vp->estats.tx_queue_max)
 458                                                vp->estats.tx_queue_max = result;
 459                                        vp->estats.tx_queue_running_average =
 460                                                (vp->estats.tx_queue_running_average + result) >> 1;
 461                                }
 462                                netif_trans_update(qi->dev);
 463                                netif_wake_queue(qi->dev);
 464                                /* if TX is busy, break out of the send loop,
 465                                 *  poll write IRQ will reschedule xmit for us
 466                                 */
 467                                if (result != send_len) {
 468                                        vp->estats.tx_restart_queue++;
 469                                        break;
 470                                }
 471                        }
 472                }
 473                spin_unlock(&qi->head_lock);
 474        } else {
 475                tasklet_schedule(&vp->tx_poll);
 476        }
 477        return queue_depth;
 478}
 479
 480/* Queue destructor. Deliberately stateless so we can use
 481 * it in queue cleanup if initialization fails.
 482 */
 483
 484static void destroy_queue(struct vector_queue *qi)
 485{
 486        int i;
 487        struct iovec *iov;
 488        struct vector_private *vp = netdev_priv(qi->dev);
 489        struct mmsghdr *mmsg_vector;
 490
 491        if (qi == NULL)
 492                return;
 493        /* deallocate any skbuffs - we rely on any unused to be
 494         * set to NULL.
 495         */
 496        if (qi->skbuff_vector != NULL) {
 497                for (i = 0; i < qi->max_depth; i++) {
 498                        if (*(qi->skbuff_vector + i) != NULL)
 499                                dev_kfree_skb_any(*(qi->skbuff_vector + i));
 500                }
 501                kfree(qi->skbuff_vector);
 502        }
 503        /* deallocate matching IOV structures including header buffs */
 504        if (qi->mmsg_vector != NULL) {
 505                mmsg_vector = qi->mmsg_vector;
 506                for (i = 0; i < qi->max_depth; i++) {
 507                        iov = mmsg_vector->msg_hdr.msg_iov;
 508                        if (iov != NULL) {
 509                                if ((vp->header_size > 0) &&
 510                                        (iov->iov_base != NULL))
 511                                        kfree(iov->iov_base);
 512                                kfree(iov);
 513                        }
 514                        mmsg_vector++;
 515                }
 516                kfree(qi->mmsg_vector);
 517        }
 518        kfree(qi);
 519}
 520
 521/*
 522 * Queue constructor. Create a queue with a given side.
 523 */
 524static struct vector_queue *create_queue(
 525        struct vector_private *vp,
 526        int max_size,
 527        int header_size,
 528        int num_extra_frags)
 529{
 530        struct vector_queue *result;
 531        int i;
 532        struct iovec *iov;
 533        struct mmsghdr *mmsg_vector;
 534
 535        result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
 536        if (result == NULL)
 537                return NULL;
 538        result->max_depth = max_size;
 539        result->dev = vp->dev;
 540        result->mmsg_vector = kmalloc(
 541                (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
 542        if (result->mmsg_vector == NULL)
 543                goto out_mmsg_fail;
 544        result->skbuff_vector = kmalloc(
 545                (sizeof(void *) * max_size), GFP_KERNEL);
 546        if (result->skbuff_vector == NULL)
 547                goto out_skb_fail;
 548
 549        /* further failures can be handled safely by destroy_queue*/
 550
 551        mmsg_vector = result->mmsg_vector;
 552        for (i = 0; i < max_size; i++) {
 553                /* Clear all pointers - we use non-NULL as marking on
 554                 * what to free on destruction
 555                 */
 556                *(result->skbuff_vector + i) = NULL;
 557                mmsg_vector->msg_hdr.msg_iov = NULL;
 558                mmsg_vector++;
 559        }
 560        mmsg_vector = result->mmsg_vector;
 561        result->max_iov_frags = num_extra_frags;
 562        for (i = 0; i < max_size; i++) {
 563                if (vp->header_size > 0)
 564                        iov = kmalloc_array(3 + num_extra_frags,
 565                                            sizeof(struct iovec),
 566                                            GFP_KERNEL
 567                        );
 568                else
 569                        iov = kmalloc_array(2 + num_extra_frags,
 570                                            sizeof(struct iovec),
 571                                            GFP_KERNEL
 572                        );
 573                if (iov == NULL)
 574                        goto out_fail;
 575                mmsg_vector->msg_hdr.msg_iov = iov;
 576                mmsg_vector->msg_hdr.msg_iovlen = 1;
 577                mmsg_vector->msg_hdr.msg_control = NULL;
 578                mmsg_vector->msg_hdr.msg_controllen = 0;
 579                mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
 580                mmsg_vector->msg_hdr.msg_name = NULL;
 581                mmsg_vector->msg_hdr.msg_namelen = 0;
 582                if (vp->header_size > 0) {
 583                        iov->iov_base = kmalloc(header_size, GFP_KERNEL);
 584                        if (iov->iov_base == NULL)
 585                                goto out_fail;
 586                        iov->iov_len = header_size;
 587                        mmsg_vector->msg_hdr.msg_iovlen = 2;
 588                        iov++;
 589                }
 590                iov->iov_base = NULL;
 591                iov->iov_len = 0;
 592                mmsg_vector++;
 593        }
 594        spin_lock_init(&result->head_lock);
 595        spin_lock_init(&result->tail_lock);
 596        result->queue_depth = 0;
 597        result->head = 0;
 598        result->tail = 0;
 599        return result;
 600out_skb_fail:
 601        kfree(result->mmsg_vector);
 602out_mmsg_fail:
 603        kfree(result);
 604        return NULL;
 605out_fail:
 606        destroy_queue(result);
 607        return NULL;
 608}
 609
 610/*
 611 * We do not use the RX queue as a proper wraparound queue for now
 612 * This is not necessary because the consumption via netif_rx()
 613 * happens in-line. While we can try using the return code of
 614 * netif_rx() for flow control there are no drivers doing this today.
 615 * For this RX specific use we ignore the tail/head locks and
 616 * just read into a prepared queue filled with skbuffs.
 617 */
 618
 619static struct sk_buff *prep_skb(
 620        struct vector_private *vp,
 621        struct user_msghdr *msg)
 622{
 623        int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
 624        struct sk_buff *result;
 625        int iov_index = 0, len;
 626        struct iovec *iov = msg->msg_iov;
 627        int err, nr_frags, frag;
 628        skb_frag_t *skb_frag;
 629
 630        if (vp->req_size <= linear)
 631                len = linear;
 632        else
 633                len = vp->req_size;
 634        result = alloc_skb_with_frags(
 635                linear,
 636                len - vp->max_packet,
 637                3,
 638                &err,
 639                GFP_ATOMIC
 640        );
 641        if (vp->header_size > 0)
 642                iov_index++;
 643        if (result == NULL) {
 644                iov[iov_index].iov_base = NULL;
 645                iov[iov_index].iov_len = 0;
 646                goto done;
 647        }
 648        skb_reserve(result, vp->headroom);
 649        result->dev = vp->dev;
 650        skb_put(result, vp->max_packet);
 651        result->data_len = len - vp->max_packet;
 652        result->len += len - vp->max_packet;
 653        skb_reset_mac_header(result);
 654        result->ip_summed = CHECKSUM_NONE;
 655        iov[iov_index].iov_base = result->data;
 656        iov[iov_index].iov_len = vp->max_packet;
 657        iov_index++;
 658
 659        nr_frags = skb_shinfo(result)->nr_frags;
 660        for (frag = 0; frag < nr_frags; frag++) {
 661                skb_frag = &skb_shinfo(result)->frags[frag];
 662                iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
 663                if (iov[iov_index].iov_base != NULL)
 664                        iov[iov_index].iov_len = skb_frag_size(skb_frag);
 665                else
 666                        iov[iov_index].iov_len = 0;
 667                iov_index++;
 668        }
 669done:
 670        msg->msg_iovlen = iov_index;
 671        return result;
 672}
 673
 674
 675/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
 676
 677static void prep_queue_for_rx(struct vector_queue *qi)
 678{
 679        struct vector_private *vp = netdev_priv(qi->dev);
 680        struct mmsghdr *mmsg_vector = qi->mmsg_vector;
 681        void **skbuff_vector = qi->skbuff_vector;
 682        int i;
 683
 684        if (qi->queue_depth == 0)
 685                return;
 686        for (i = 0; i < qi->queue_depth; i++) {
 687                /* it is OK if allocation fails - recvmmsg with NULL data in
 688                 * iov argument still performs an RX, just drops the packet
 689                 * This allows us stop faffing around with a "drop buffer"
 690                 */
 691
 692                *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
 693                skbuff_vector++;
 694                mmsg_vector++;
 695        }
 696        qi->queue_depth = 0;
 697}
 698
 699static struct vector_device *find_device(int n)
 700{
 701        struct vector_device *device;
 702        struct list_head *ele;
 703
 704        spin_lock(&vector_devices_lock);
 705        list_for_each(ele, &vector_devices) {
 706                device = list_entry(ele, struct vector_device, list);
 707                if (device->unit == n)
 708                        goto out;
 709        }
 710        device = NULL;
 711 out:
 712        spin_unlock(&vector_devices_lock);
 713        return device;
 714}
 715
 716static int vector_parse(char *str, int *index_out, char **str_out,
 717                        char **error_out)
 718{
 719        int n, len, err;
 720        char *start = str;
 721
 722        len = strlen(str);
 723
 724        while ((*str != ':') && (strlen(str) > 1))
 725                str++;
 726        if (*str != ':') {
 727                *error_out = "Expected ':' after device number";
 728                return -EINVAL;
 729        }
 730        *str = '\0';
 731
 732        err = kstrtouint(start, 0, &n);
 733        if (err < 0) {
 734                *error_out = "Bad device number";
 735                return err;
 736        }
 737
 738        str++;
 739        if (find_device(n)) {
 740                *error_out = "Device already configured";
 741                return -EINVAL;
 742        }
 743
 744        *index_out = n;
 745        *str_out = str;
 746        return 0;
 747}
 748
 749static int vector_config(char *str, char **error_out)
 750{
 751        int err, n;
 752        char *params;
 753        struct arglist *parsed;
 754
 755        err = vector_parse(str, &n, &params, error_out);
 756        if (err != 0)
 757                return err;
 758
 759        /* This string is broken up and the pieces used by the underlying
 760         * driver. We should copy it to make sure things do not go wrong
 761         * later.
 762         */
 763
 764        params = kstrdup(params, GFP_KERNEL);
 765        if (params == NULL) {
 766                *error_out = "vector_config failed to strdup string";
 767                return -ENOMEM;
 768        }
 769
 770        parsed = uml_parse_vector_ifspec(params);
 771
 772        if (parsed == NULL) {
 773                *error_out = "vector_config failed to parse parameters";
 774                return -EINVAL;
 775        }
 776
 777        vector_eth_configure(n, parsed);
 778        return 0;
 779}
 780
 781static int vector_id(char **str, int *start_out, int *end_out)
 782{
 783        char *end;
 784        int n;
 785
 786        n = simple_strtoul(*str, &end, 0);
 787        if ((*end != '\0') || (end == *str))
 788                return -1;
 789
 790        *start_out = n;
 791        *end_out = n;
 792        *str = end;
 793        return n;
 794}
 795
 796static int vector_remove(int n, char **error_out)
 797{
 798        struct vector_device *vec_d;
 799        struct net_device *dev;
 800        struct vector_private *vp;
 801
 802        vec_d = find_device(n);
 803        if (vec_d == NULL)
 804                return -ENODEV;
 805        dev = vec_d->dev;
 806        vp = netdev_priv(dev);
 807        if (vp->fds != NULL)
 808                return -EBUSY;
 809        unregister_netdev(dev);
 810        platform_device_unregister(&vec_d->pdev);
 811        return 0;
 812}
 813
 814/*
 815 * There is no shared per-transport initialization code, so
 816 * we will just initialize each interface one by one and
 817 * add them to a list
 818 */
 819
 820static struct platform_driver uml_net_driver = {
 821        .driver = {
 822                .name = DRIVER_NAME,
 823        },
 824};
 825
 826
 827static void vector_device_release(struct device *dev)
 828{
 829        struct vector_device *device = dev_get_drvdata(dev);
 830        struct net_device *netdev = device->dev;
 831
 832        list_del(&device->list);
 833        kfree(device);
 834        free_netdev(netdev);
 835}
 836
 837/* Bog standard recv using recvmsg - not used normally unless the user
 838 * explicitly specifies not to use recvmmsg vector RX.
 839 */
 840
 841static int vector_legacy_rx(struct vector_private *vp)
 842{
 843        int pkt_len;
 844        struct user_msghdr hdr;
 845        struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
 846        int iovpos = 0;
 847        struct sk_buff *skb;
 848        int header_check;
 849
 850        hdr.msg_name = NULL;
 851        hdr.msg_namelen = 0;
 852        hdr.msg_iov = (struct iovec *) &iov;
 853        hdr.msg_control = NULL;
 854        hdr.msg_controllen = 0;
 855        hdr.msg_flags = 0;
 856
 857        if (vp->header_size > 0) {
 858                iov[0].iov_base = vp->header_rxbuffer;
 859                iov[0].iov_len = vp->header_size;
 860        }
 861
 862        skb = prep_skb(vp, &hdr);
 863
 864        if (skb == NULL) {
 865                /* Read a packet into drop_buffer and don't do
 866                 * anything with it.
 867                 */
 868                iov[iovpos].iov_base = drop_buffer;
 869                iov[iovpos].iov_len = DROP_BUFFER_SIZE;
 870                hdr.msg_iovlen = 1;
 871                vp->dev->stats.rx_dropped++;
 872        }
 873
 874        pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
 875        if (pkt_len < 0) {
 876                vp->in_error = true;
 877                return pkt_len;
 878        }
 879
 880        if (skb != NULL) {
 881                if (pkt_len > vp->header_size) {
 882                        if (vp->header_size > 0) {
 883                                header_check = vp->verify_header(
 884                                        vp->header_rxbuffer, skb, vp);
 885                                if (header_check < 0) {
 886                                        dev_kfree_skb_irq(skb);
 887                                        vp->dev->stats.rx_dropped++;
 888                                        vp->estats.rx_encaps_errors++;
 889                                        return 0;
 890                                }
 891                                if (header_check > 0) {
 892                                        vp->estats.rx_csum_offload_good++;
 893                                        skb->ip_summed = CHECKSUM_UNNECESSARY;
 894                                }
 895                        }
 896                        pskb_trim(skb, pkt_len - vp->rx_header_size);
 897                        skb->protocol = eth_type_trans(skb, skb->dev);
 898                        vp->dev->stats.rx_bytes += skb->len;
 899                        vp->dev->stats.rx_packets++;
 900                        netif_rx(skb);
 901                } else {
 902                        dev_kfree_skb_irq(skb);
 903                }
 904        }
 905        return pkt_len;
 906}
 907
 908/*
 909 * Packet at a time TX which falls back to vector TX if the
 910 * underlying transport is busy.
 911 */
 912
 913
 914
 915static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
 916{
 917        struct iovec iov[3 + MAX_IOV_SIZE];
 918        int iov_count, pkt_len = 0;
 919
 920        iov[0].iov_base = vp->header_txbuffer;
 921        iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
 922
 923        if (iov_count < 1)
 924                goto drop;
 925
 926        pkt_len = uml_vector_writev(
 927                vp->fds->tx_fd,
 928                (struct iovec *) &iov,
 929                iov_count
 930        );
 931
 932        if (pkt_len < 0)
 933                goto drop;
 934
 935        netif_trans_update(vp->dev);
 936        netif_wake_queue(vp->dev);
 937
 938        if (pkt_len > 0) {
 939                vp->dev->stats.tx_bytes += skb->len;
 940                vp->dev->stats.tx_packets++;
 941        } else {
 942                vp->dev->stats.tx_dropped++;
 943        }
 944        consume_skb(skb);
 945        return pkt_len;
 946drop:
 947        vp->dev->stats.tx_dropped++;
 948        consume_skb(skb);
 949        if (pkt_len < 0)
 950                vp->in_error = true;
 951        return pkt_len;
 952}
 953
 954/*
 955 * Receive as many messages as we can in one call using the special
 956 * mmsg vector matched to an skb vector which we prepared earlier.
 957 */
 958
 959static int vector_mmsg_rx(struct vector_private *vp)
 960{
 961        int packet_count, i;
 962        struct vector_queue *qi = vp->rx_queue;
 963        struct sk_buff *skb;
 964        struct mmsghdr *mmsg_vector = qi->mmsg_vector;
 965        void **skbuff_vector = qi->skbuff_vector;
 966        int header_check;
 967
 968        /* Refresh the vector and make sure it is with new skbs and the
 969         * iovs are updated to point to them.
 970         */
 971
 972        prep_queue_for_rx(qi);
 973
 974        /* Fire the Lazy Gun - get as many packets as we can in one go. */
 975
 976        packet_count = uml_vector_recvmmsg(
 977                vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
 978
 979        if (packet_count < 0)
 980                vp->in_error = true;
 981
 982        if (packet_count <= 0)
 983                return packet_count;
 984
 985        /* We treat packet processing as enqueue, buffer refresh as dequeue
 986         * The queue_depth tells us how many buffers have been used and how
 987         * many do we need to prep the next time prep_queue_for_rx() is called.
 988         */
 989
 990        qi->queue_depth = packet_count;
 991
 992        for (i = 0; i < packet_count; i++) {
 993                skb = (*skbuff_vector);
 994                if (mmsg_vector->msg_len > vp->header_size) {
 995                        if (vp->header_size > 0) {
 996                                header_check = vp->verify_header(
 997                                        mmsg_vector->msg_hdr.msg_iov->iov_base,
 998                                        skb,
 999                                        vp
1000                                );

1001                                if (header_check < 0) {
1002                                /* Overlay header failed to verify - discard.
1003                                 * We can actually keep this skb and reuse it,
1004                                 * but that will make the prep logic too
1005                                 * complex.
1006                                 */
1007                                        dev_kfree_skb_irq(skb);
1008                                        vp->estats.rx_encaps_errors++;
1009                                        continue;
1010                                }
1011                                if (header_check > 0) {
1012                                        vp->estats.rx_csum_offload_good++;
1013                                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1014                                }
1015                        }
1016                        pskb_trim(skb,
1017                                mmsg_vector->msg_len - vp->rx_header_size);
1018                        skb->protocol = eth_type_trans(skb, skb->dev);
1019                        /*
1020                         * We do not need to lock on updating stats here
1021                         * The interrupt loop is non-reentrant.
1022                         */
1023                        vp->dev->stats.rx_bytes += skb->len;
1024                        vp->dev->stats.rx_packets++;
1025                        netif_rx(skb);
1026                } else {
1027                        /* Overlay header too short to do anything - discard.
1028                         * We can actually keep this skb and reuse it,
1029                         * but that will make the prep logic too complex.
1030                         */
1031                        if (skb != NULL)
1032                                dev_kfree_skb_irq(skb);
1033                }
1034                (*skbuff_vector) = NULL;
1035                /* Move to the next buffer element */
1036                mmsg_vector++;
1037                skbuff_vector++;
1038        }
1039        if (packet_count > 0) {
1040                if (vp->estats.rx_queue_max < packet_count)
1041                        vp->estats.rx_queue_max = packet_count;
1042                vp->estats.rx_queue_running_average =
1043                        (vp->estats.rx_queue_running_average + packet_count) >> 1;
1044        }
1045        return packet_count;
1046}
1047
1048static void vector_rx(struct vector_private *vp)
1049{
1050        int err;
1051        int iter = 0;
1052
1053        if ((vp->options & VECTOR_RX) > 0)
1054                while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
1055                        iter++;
1056        else
1057                while (((err = vector_legacy_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
1058                        iter++;
1059        if ((err != 0) && net_ratelimit())
1060                netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
1061        if (iter == MAX_ITERATIONS)
1062                netdev_err(vp->dev, "vector_rx: device stuck, remote end may have closed the connection\n");
1063}
1064
1065static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
1066{
1067        struct vector_private *vp = netdev_priv(dev);
1068        int queue_depth = 0;
1069
1070        if (vp->in_error) {
1071                deactivate_fd(vp->fds->rx_fd, vp->rx_irq);
1072                if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0))
1073                        deactivate_fd(vp->fds->tx_fd, vp->tx_irq);
1074                return NETDEV_TX_BUSY;
1075        }
1076
1077        if ((vp->options & VECTOR_TX) == 0) {
1078                writev_tx(vp, skb);
1079                return NETDEV_TX_OK;
1080        }
1081
1082        /* We do BQL only in the vector path, no point doing it in
1083         * packet at a time mode as there is no device queue
1084         */
1085
1086        netdev_sent_queue(vp->dev, skb->len);
1087        queue_depth = vector_enqueue(vp->tx_queue, skb);
1088
1089        /* if the device queue is full, stop the upper layers and
1090         * flush it.
1091         */
1092
1093        if (queue_depth >= vp->tx_queue->max_depth - 1) {
1094                vp->estats.tx_kicks++;
1095                netif_stop_queue(dev);
1096                vector_send(vp->tx_queue);
1097                return NETDEV_TX_OK;
1098        }
1099        if (netdev_xmit_more()) {
1100                mod_timer(&vp->tl, vp->coalesce);
1101                return NETDEV_TX_OK;
1102        }
1103        if (skb->len < TX_SMALL_PACKET) {
1104                vp->estats.tx_kicks++;
1105                vector_send(vp->tx_queue);
1106        } else
1107                tasklet_schedule(&vp->tx_poll);
1108        return NETDEV_TX_OK;
1109}
1110
1111static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
1112{
1113        struct net_device *dev = dev_id;
1114        struct vector_private *vp = netdev_priv(dev);
1115
1116        if (!netif_running(dev))
1117                return IRQ_NONE;
1118        vector_rx(vp);
1119        return IRQ_HANDLED;
1120
1121}
1122
1123static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
1124{
1125        struct net_device *dev = dev_id;
1126        struct vector_private *vp = netdev_priv(dev);
1127
1128        if (!netif_running(dev))
1129                return IRQ_NONE;
1130        /* We need to pay attention to it only if we got
1131         * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
1132         * we ignore it. In the future, it may be worth
1133         * it to improve the IRQ controller a bit to make
1134         * tweaking the IRQ mask less costly
1135         */
1136
1137        if (vp->in_write_poll)
1138                tasklet_schedule(&vp->tx_poll);
1139        return IRQ_HANDLED;
1140
1141}
1142
1143static int irq_rr;
1144
1145static int vector_net_close(struct net_device *dev)
1146{
1147        struct vector_private *vp = netdev_priv(dev);
1148        unsigned long flags;
1149
1150        netif_stop_queue(dev);
1151        del_timer(&vp->tl);
1152
1153        if (vp->fds == NULL)
1154                return 0;
1155
1156        /* Disable and free all IRQS */
1157        if (vp->rx_irq > 0) {
1158                um_free_irq(vp->rx_irq, dev);
1159                vp->rx_irq = 0;
1160        }
1161        if (vp->tx_irq > 0) {
1162                um_free_irq(vp->tx_irq, dev);
1163                vp->tx_irq = 0;
1164        }
1165        tasklet_kill(&vp->tx_poll);
1166        if (vp->fds->rx_fd > 0) {
1167                if (vp->bpf)
1168                        uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
1169                os_close_file(vp->fds->rx_fd);
1170                vp->fds->rx_fd = -1;
1171        }
1172        if (vp->fds->tx_fd > 0) {
1173                os_close_file(vp->fds->tx_fd);
1174                vp->fds->tx_fd = -1;
1175        }
1176        if (vp->bpf != NULL)
1177                kfree(vp->bpf->filter);
1178        kfree(vp->bpf);
1179        vp->bpf = NULL;
1180        kfree(vp->fds->remote_addr);
1181        kfree(vp->transport_data);
1182        kfree(vp->header_rxbuffer);
1183        kfree(vp->header_txbuffer);
1184        if (vp->rx_queue != NULL)
1185                destroy_queue(vp->rx_queue);
1186        if (vp->tx_queue != NULL)
1187                destroy_queue(vp->tx_queue);
1188        kfree(vp->fds);
1189        vp->fds = NULL;
1190        spin_lock_irqsave(&vp->lock, flags);
1191        vp->opened = false;
1192        vp->in_error = false;
1193        spin_unlock_irqrestore(&vp->lock, flags);
1194        return 0;
1195}
1196
1197/* TX tasklet */
1198
1199static void vector_tx_poll(unsigned long data)
1200{
1201        struct vector_private *vp = (struct vector_private *)data;
1202
1203        vp->estats.tx_kicks++;
1204        vector_send(vp->tx_queue);
1205}
1206static void vector_reset_tx(struct work_struct *work)
1207{
1208        struct vector_private *vp =
1209                container_of(work, struct vector_private, reset_tx);
1210        netdev_reset_queue(vp->dev);
1211        netif_start_queue(vp->dev);
1212        netif_wake_queue(vp->dev);
1213}
1214
1215static int vector_net_open(struct net_device *dev)
1216{
1217        struct vector_private *vp = netdev_priv(dev);
1218        unsigned long flags;
1219        int err = -EINVAL;
1220        struct vector_device *vdevice;
1221
1222        spin_lock_irqsave(&vp->lock, flags);
1223        if (vp->opened) {
1224                spin_unlock_irqrestore(&vp->lock, flags);
1225                return -ENXIO;
1226        }
1227        vp->opened = true;
1228        spin_unlock_irqrestore(&vp->lock, flags);
1229
1230        vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
1231
1232        vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
1233
1234        if (vp->fds == NULL)
1235                goto out_close;
1236
1237        if (build_transport_data(vp) < 0)
1238                goto out_close;
1239
1240        if ((vp->options & VECTOR_RX) > 0) {
1241                vp->rx_queue = create_queue(
1242                        vp,
1243                        get_depth(vp->parsed),
1244                        vp->rx_header_size,
1245                        MAX_IOV_SIZE
1246                );
1247                vp->rx_queue->queue_depth = get_depth(vp->parsed);
1248        } else {
1249                vp->header_rxbuffer = kmalloc(
1250                        vp->rx_header_size,
1251                        GFP_KERNEL
1252                );
1253                if (vp->header_rxbuffer == NULL)
1254                        goto out_close;
1255        }
1256        if ((vp->options & VECTOR_TX) > 0) {
1257                vp->tx_queue = create_queue(
1258                        vp,
1259                        get_depth(vp->parsed),
1260                        vp->header_size,
1261                        MAX_IOV_SIZE
1262                );
1263        } else {
1264                vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
1265                if (vp->header_txbuffer == NULL)
1266                        goto out_close;
1267        }
1268
1269        /* READ IRQ */
1270        err = um_request_irq(
1271                irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
1272                        IRQ_READ, vector_rx_interrupt,
1273                        IRQF_SHARED, dev->name, dev);
1274        if (err != 0) {
1275                netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
1276                err = -ENETUNREACH;
1277                goto out_close;
1278        }
1279        vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
1280        dev->irq = irq_rr + VECTOR_BASE_IRQ;
1281        irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1282
1283        /* WRITE IRQ - we need it only if we have vector TX */
1284        if ((vp->options & VECTOR_TX) > 0) {
1285                err = um_request_irq(
1286                        irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
1287                                IRQ_WRITE, vector_tx_interrupt,
1288                                IRQF_SHARED, dev->name, dev);
1289                if (err != 0) {
1290                        netdev_err(dev,
1291                                "vector_open: failed to get tx irq(%d)\n", err);
1292                        err = -ENETUNREACH;
1293                        goto out_close;
1294                }
1295                vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
1296                irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1297        }
1298
1299        if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
1300                if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
1301                        vp->options |= VECTOR_BPF;
1302        }
1303        if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL))
1304                vp->bpf = uml_vector_default_bpf(dev->dev_addr);
1305
1306        if (vp->bpf != NULL)
1307                uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
1308
1309        netif_start_queue(dev);
1310
1311        /* clear buffer - it can happen that the host side of the interface
1312         * is full when we get here. In this case, new data is never queued,
1313         * SIGIOs never arrive, and the net never works.
1314         */
1315
1316        vector_rx(vp);
1317
1318        vector_reset_stats(vp);
1319        vdevice = find_device(vp->unit);
1320        vdevice->opened = 1;
1321
1322        if ((vp->options & VECTOR_TX) != 0)
1323                add_timer(&vp->tl);
1324        return 0;
1325out_close:
1326        vector_net_close(dev);
1327        return err;
1328}
1329
1330
1331static void vector_net_set_multicast_list(struct net_device *dev)
1332{
1333        /* TODO: - we can do some BPF games here */
1334        return;
1335}
1336
1337static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
1338{
1339        struct vector_private *vp = netdev_priv(dev);
1340
1341        vp->estats.tx_timeout_count++;
1342        netif_trans_update(dev);
1343        schedule_work(&vp->reset_tx);
1344}
1345
1346static netdev_features_t vector_fix_features(struct net_device *dev,
1347        netdev_features_t features)
1348{
1349        features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
1350        return features;
1351}
1352
1353static int vector_set_features(struct net_device *dev,
1354        netdev_features_t features)
1355{
1356        struct vector_private *vp = netdev_priv(dev);
1357        /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
1358         * no way to negotiate it on raw sockets, so we can change
1359         * only our side.
1360         */
1361        if (features & NETIF_F_GRO)
1362                /* All new frame buffers will be GRO-sized */
1363                vp->req_size = 65536;
1364        else
1365                /* All new frame buffers will be normal sized */
1366                vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
1367        return 0;
1368}
1369
1370#ifdef CONFIG_NET_POLL_CONTROLLER
1371static void vector_net_poll_controller(struct net_device *dev)
1372{
1373        disable_irq(dev->irq);
1374        vector_rx_interrupt(dev->irq, dev);
1375        enable_irq(dev->irq);
1376}
1377#endif
1378
1379static void vector_net_get_drvinfo(struct net_device *dev,
1380                                struct ethtool_drvinfo *info)
1381{
1382        strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
1383}
1384
1385static int vector_net_load_bpf_flash(struct net_device *dev,
1386                                struct ethtool_flash *efl)
1387{
1388        struct vector_private *vp = netdev_priv(dev);
1389        struct vector_device *vdevice;
1390        const struct firmware *fw;
1391        int result = 0;
1392
1393        if (!(vp->options & VECTOR_BPF_FLASH)) {
1394                netdev_err(dev, "loading firmware not permitted: %s\n", efl->data);
1395                return -1;
1396        }
1397
1398        spin_lock(&vp->lock);
1399
1400        if (vp->bpf != NULL) {
1401                if (vp->opened)
1402                        uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
1403                kfree(vp->bpf->filter);
1404                vp->bpf->filter = NULL;
1405        } else {
1406                vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
1407                if (vp->bpf == NULL) {
1408                        netdev_err(dev, "failed to allocate memory for firmware\n");
1409                        goto flash_fail;
1410                }
1411        }
1412
1413        vdevice = find_device(vp->unit);
1414
1415        if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
1416                goto flash_fail;
1417
1418        vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
1419        if (!vp->bpf->filter)
1420                goto free_buffer;
1421
1422        vp->bpf->len = fw->size / sizeof(struct sock_filter);
1423        release_firmware(fw);
1424
1425        if (vp->opened)
1426                result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
1427
1428        spin_unlock(&vp->lock);
1429
1430        return result;
1431
1432free_buffer:
1433        release_firmware(fw);
1434
1435flash_fail:
1436        spin_unlock(&vp->lock);
1437        if (vp->bpf != NULL)
1438                kfree(vp->bpf->filter);
1439        kfree(vp->bpf);
1440        vp->bpf = NULL;
1441        return -1;
1442}
1443
1444static void vector_get_ringparam(struct net_device *netdev,
1445                                struct ethtool_ringparam *ring)
1446{
1447        struct vector_private *vp = netdev_priv(netdev);
1448
1449        ring->rx_max_pending = vp->rx_queue->max_depth;
1450        ring->tx_max_pending = vp->tx_queue->max_depth;
1451        ring->rx_pending = vp->rx_queue->max_depth;
1452        ring->tx_pending = vp->tx_queue->max_depth;
1453}
1454
1455static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
1456{
1457        switch (stringset) {
1458        case ETH_SS_TEST:
1459                *buf = '\0';
1460                break;
1461        case ETH_SS_STATS:
1462                memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
1463                break;
1464        default:
1465                WARN_ON(1);
1466                break;
1467        }
1468}
1469
1470static int vector_get_sset_count(struct net_device *dev, int sset)
1471{
1472        switch (sset) {
1473        case ETH_SS_TEST:
1474                return 0;
1475        case ETH_SS_STATS:
1476                return VECTOR_NUM_STATS;
1477        default:
1478                return -EOPNOTSUPP;
1479        }
1480}
1481
1482static void vector_get_ethtool_stats(struct net_device *dev,
1483        struct ethtool_stats *estats,
1484        u64 *tmp_stats)
1485{
1486        struct vector_private *vp = netdev_priv(dev);
1487
1488        memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
1489}
1490
1491static int vector_get_coalesce(struct net_device *netdev,
1492                                        struct ethtool_coalesce *ec)
1493{
1494        struct vector_private *vp = netdev_priv(netdev);
1495
1496        ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
1497        return 0;
1498}
1499
1500static int vector_set_coalesce(struct net_device *netdev,
1501                                        struct ethtool_coalesce *ec)
1502{
1503        struct vector_private *vp = netdev_priv(netdev);
1504
1505        vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
1506        if (vp->coalesce == 0)
1507                vp->coalesce = 1;
1508        return 0;
1509}
1510
1511static const struct ethtool_ops vector_net_ethtool_ops = {
1512        .supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS,
1513        .get_drvinfo    = vector_net_get_drvinfo,
1514        .get_link       = ethtool_op_get_link,
1515        .get_ts_info    = ethtool_op_get_ts_info,
1516        .get_ringparam  = vector_get_ringparam,
1517        .get_strings    = vector_get_strings,
1518        .get_sset_count = vector_get_sset_count,
1519        .get_ethtool_stats = vector_get_ethtool_stats,
1520        .get_coalesce   = vector_get_coalesce,
1521        .set_coalesce   = vector_set_coalesce,
1522        .flash_device   = vector_net_load_bpf_flash,
1523};
1524
1525
1526static const struct net_device_ops vector_netdev_ops = {
1527        .ndo_open               = vector_net_open,
1528        .ndo_stop               = vector_net_close,
1529        .ndo_start_xmit         = vector_net_start_xmit,
1530        .ndo_set_rx_mode        = vector_net_set_multicast_list,
1531        .ndo_tx_timeout         = vector_net_tx_timeout,
1532        .ndo_set_mac_address    = eth_mac_addr,
1533        .ndo_validate_addr      = eth_validate_addr,
1534        .ndo_fix_features       = vector_fix_features,
1535        .ndo_set_features       = vector_set_features,
1536#ifdef CONFIG_NET_POLL_CONTROLLER
1537        .ndo_poll_controller = vector_net_poll_controller,
1538#endif
1539};
1540
1541
1542static void vector_timer_expire(struct timer_list *t)
1543{
1544        struct vector_private *vp = from_timer(vp, t, tl);
1545
1546        vp->estats.tx_kicks++;
1547        vector_send(vp->tx_queue);
1548}
1549
1550static void vector_eth_configure(
1551                int n,
1552                struct arglist *def
1553        )
1554{
1555        struct vector_device *device;
1556        struct net_device *dev;
1557        struct vector_private *vp;
1558        int err;
1559
1560        device = kzalloc(sizeof(*device), GFP_KERNEL);
1561        if (device == NULL) {
1562                printk(KERN_ERR "eth_configure failed to allocate struct "
1563                                 "vector_device\n");
1564                return;
1565        }
1566        dev = alloc_etherdev(sizeof(struct vector_private));
1567        if (dev == NULL) {
1568                printk(KERN_ERR "eth_configure: failed to allocate struct "
1569                                 "net_device for vec%d\n", n);
1570                goto out_free_device;
1571        }
1572
1573        dev->mtu = get_mtu(def);
1574
1575        INIT_LIST_HEAD(&device->list);
1576        device->unit = n;
1577
1578        /* If this name ends up conflicting with an existing registered
1579         * netdevice, that is OK, register_netdev{,ice}() will notice this
1580         * and fail.
1581         */
1582        snprintf(dev->name, sizeof(dev->name), "vec%d", n);
1583        uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
1584        vp = netdev_priv(dev);
1585
1586        /* sysfs register */
1587        if (!driver_registered) {
1588                platform_driver_register(&uml_net_driver);
1589                driver_registered = 1;
1590        }
1591        device->pdev.id = n;
1592        device->pdev.name = DRIVER_NAME;
1593        device->pdev.dev.release = vector_device_release;
1594        dev_set_drvdata(&device->pdev.dev, device);
1595        if (platform_device_register(&device->pdev))
1596                goto out_free_netdev;
1597        SET_NETDEV_DEV(dev, &device->pdev.dev);
1598
1599        device->dev = dev;
1600
1601        *vp = ((struct vector_private)
1602                {
1603                .list                   = LIST_HEAD_INIT(vp->list),
1604                .dev                    = dev,
1605                .unit                   = n,
1606                .options                = get_transport_options(def),
1607                .rx_irq                 = 0,
1608                .tx_irq                 = 0,
1609                .parsed                 = def,
1610                .max_packet             = get_mtu(def) + ETH_HEADER_OTHER,
1611                /* TODO - we need to calculate headroom so that ip header
1612                 * is 16 byte aligned all the time
1613                 */
1614                .headroom               = get_headroom(def),
1615                .form_header            = NULL,
1616                .verify_header          = NULL,
1617                .header_rxbuffer        = NULL,
1618                .header_txbuffer        = NULL,
1619                .header_size            = 0,
1620                .rx_header_size         = 0,
1621                .rexmit_scheduled       = false,
1622                .opened                 = false,
1623                .transport_data         = NULL,
1624                .in_write_poll          = false,
1625                .coalesce               = 2,
1626                .req_size               = get_req_size(def),
1627                .in_error               = false,
1628                .bpf                    = NULL
1629        });
1630
1631        dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
1632        tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
1633        INIT_WORK(&vp->reset_tx, vector_reset_tx);
1634
1635        timer_setup(&vp->tl, vector_timer_expire, 0);
1636        spin_lock_init(&vp->lock);
1637
1638        /* FIXME */
1639        dev->netdev_ops = &vector_netdev_ops;
1640        dev->ethtool_ops = &vector_net_ethtool_ops;
1641        dev->watchdog_timeo = (HZ >> 1);
1642        /* primary IRQ - fixme */
1643        dev->irq = 0; /* we will adjust this once opened */
1644
1645        rtnl_lock();
1646        err = register_netdevice(dev);
1647        rtnl_unlock();
1648        if (err)
1649                goto out_undo_user_init;
1650
1651        spin_lock(&vector_devices_lock);
1652        list_add(&device->list, &vector_devices);
1653        spin_unlock(&vector_devices_lock);
1654
1655        return;
1656
1657out_undo_user_init:
1658        return;
1659out_free_netdev:
1660        free_netdev(dev);
1661out_free_device:
1662        kfree(device);
1663}
1664
1665
1666
1667
1668/*
1669 * Invoked late in the init
1670 */
1671
1672static int __init vector_init(void)
1673{
1674        struct list_head *ele;
1675        struct vector_cmd_line_arg *def;
1676        struct arglist *parsed;
1677
1678        list_for_each(ele, &vec_cmd_line) {
1679                def = list_entry(ele, struct vector_cmd_line_arg, list);
1680                parsed = uml_parse_vector_ifspec(def->arguments);
1681                if (parsed != NULL)
1682                        vector_eth_configure(def->unit, parsed);
1683        }
1684        return 0;
1685}
1686
1687
1688/* Invoked at initial argument parsing, only stores
1689 * arguments until a proper vector_init is called
1690 * later
1691 */
1692
1693static int __init vector_setup(char *str)
1694{
1695        char *error;
1696        int n, err;
1697        struct vector_cmd_line_arg *new;
1698
1699        err = vector_parse(str, &n, &str, &error);
1700        if (err) {
1701                printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
1702                                 str, error);
1703                return 1;
1704        }
1705        new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
1706        if (!new)
1707                panic("%s: Failed to allocate %zu bytes\n", __func__,
1708                      sizeof(*new));
1709        INIT_LIST_HEAD(&new->list);
1710        new->unit = n;
1711        new->arguments = str;
1712        list_add_tail(&new->list, &vec_cmd_line);
1713        return 1;
1714}
1715
1716__setup("vec", vector_setup);
1717__uml_help(vector_setup,
1718"vec[0-9]+:<option>=<value>,<option>=<value>\n"
1719"        Configure a vector io network device.\n\n"
1720);
1721
1722late_initcall(vector_init);
1723
1724static struct mc_device vector_mc = {
1725        .list           = LIST_HEAD_INIT(vector_mc.list),
1726        .name           = "vec",
1727        .config         = vector_config,
1728        .get_config     = NULL,
1729        .id             = vector_id,
1730        .remove         = vector_remove,
1731};
1732
1733#ifdef CONFIG_INET
1734static int vector_inetaddr_event(
1735        struct notifier_block *this,
1736        unsigned long event,
1737        void *ptr)
1738{
1739        return NOTIFY_DONE;
1740}
1741
1742static struct notifier_block vector_inetaddr_notifier = {
1743        .notifier_call          = vector_inetaddr_event,
1744};
1745
1746static void inet_register(void)
1747{
1748        register_inetaddr_notifier(&vector_inetaddr_notifier);
1749}
1750#else
1751static inline void inet_register(void)
1752{
1753}
1754#endif
1755
1756static int vector_net_init(void)
1757{
1758        mconsole_register_dev(&vector_mc);
1759        inet_register();
1760        return 0;
1761}
1762
1763__initcall(vector_net_init);
1764
1765
1766
1767