linux/drivers/net/veth.c
<<
>>
Prefs
   1/*
   2 *  drivers/net/veth.c
   3 *
   4 *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
   5 *
   6 * Author: Pavel Emelianov <xemul@openvz.org>
   7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
   8 *
   9 */
  10
  11#include <linux/netdevice.h>
  12#include <linux/slab.h>
  13#include <linux/ethtool.h>
  14#include <linux/etherdevice.h>
  15#include <linux/u64_stats_sync.h>
  16
  17#include <net/dst.h>
  18#include <net/xfrm.h>
  19#include <linux/veth.h>
  20#include <linux/module.h>
  21
  22#define DRV_NAME        "veth"
  23#define DRV_VERSION     "1.0"
  24
  25#define MIN_MTU 68              /* Min L3 MTU */
  26#define MAX_MTU 65535           /* Max L3 MTU (arbitrary) */
  27
  28struct veth_net_stats {
  29        u64                     rx_packets;
  30        u64                     rx_bytes;
  31        u64                     tx_packets;
  32        u64                     tx_bytes;
  33        u64                     rx_dropped;
  34        struct u64_stats_sync   syncp;
  35};
  36
  37struct veth_priv {
  38        struct net_device *peer;
  39        struct veth_net_stats __percpu *stats;
  40};
  41
  42/*
  43 * ethtool interface
  44 */
  45
  46static struct {
  47        const char string[ETH_GSTRING_LEN];
  48} ethtool_stats_keys[] = {
  49        { "peer_ifindex" },
  50};
  51
  52static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
  53{
  54        cmd->supported          = 0;
  55        cmd->advertising        = 0;
  56        ethtool_cmd_speed_set(cmd, SPEED_10000);
  57        cmd->duplex             = DUPLEX_FULL;
  58        cmd->port               = PORT_TP;
  59        cmd->phy_address        = 0;
  60        cmd->transceiver        = XCVR_INTERNAL;
  61        cmd->autoneg            = AUTONEG_DISABLE;
  62        cmd->maxtxpkt           = 0;
  63        cmd->maxrxpkt           = 0;
  64        return 0;
  65}
  66
  67static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
  68{
  69        strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
  70        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
  71}
  72
  73static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
  74{
  75        switch(stringset) {
  76        case ETH_SS_STATS:
  77                memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
  78                break;
  79        }
  80}
  81
  82static int veth_get_sset_count(struct net_device *dev, int sset)
  83{
  84        switch (sset) {
  85        case ETH_SS_STATS:
  86                return ARRAY_SIZE(ethtool_stats_keys);
  87        default:
  88                return -EOPNOTSUPP;
  89        }
  90}
  91
  92static void veth_get_ethtool_stats(struct net_device *dev,
  93                struct ethtool_stats *stats, u64 *data)
  94{
  95        struct veth_priv *priv;
  96
  97        priv = netdev_priv(dev);
  98        data[0] = priv->peer->ifindex;
  99}
 100
 101static const struct ethtool_ops veth_ethtool_ops = {
 102        .get_settings           = veth_get_settings,
 103        .get_drvinfo            = veth_get_drvinfo,
 104        .get_link               = ethtool_op_get_link,
 105        .get_strings            = veth_get_strings,
 106        .get_sset_count         = veth_get_sset_count,
 107        .get_ethtool_stats      = veth_get_ethtool_stats,
 108};
 109
 110/*
 111 * xmit
 112 */
 113
 114static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 115{
 116        struct net_device *rcv = NULL;
 117        struct veth_priv *priv, *rcv_priv;
 118        struct veth_net_stats *stats, *rcv_stats;
 119        int length;
 120
 121        priv = netdev_priv(dev);
 122        rcv = priv->peer;
 123        rcv_priv = netdev_priv(rcv);
 124
 125        stats = this_cpu_ptr(priv->stats);
 126        rcv_stats = this_cpu_ptr(rcv_priv->stats);
 127
 128        /* don't change ip_summed == CHECKSUM_PARTIAL, as that
 129           will cause bad checksum on forwarded packets */
 130        if (skb->ip_summed == CHECKSUM_NONE &&
 131            rcv->features & NETIF_F_RXCSUM)
 132                skb->ip_summed = CHECKSUM_UNNECESSARY;
 133
 134        length = skb->len;
 135        if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
 136                goto rx_drop;
 137
 138        u64_stats_update_begin(&stats->syncp);
 139        stats->tx_bytes += length;
 140        stats->tx_packets++;
 141        u64_stats_update_end(&stats->syncp);
 142
 143        u64_stats_update_begin(&rcv_stats->syncp);
 144        rcv_stats->rx_bytes += length;
 145        rcv_stats->rx_packets++;
 146        u64_stats_update_end(&rcv_stats->syncp);
 147
 148        return NETDEV_TX_OK;
 149
 150rx_drop:
 151        u64_stats_update_begin(&rcv_stats->syncp);
 152        rcv_stats->rx_dropped++;
 153        u64_stats_update_end(&rcv_stats->syncp);
 154        return NETDEV_TX_OK;
 155}
 156
 157/*
 158 * general routines
 159 */
 160
 161static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
 162                                                  struct rtnl_link_stats64 *tot)
 163{
 164        struct veth_priv *priv = netdev_priv(dev);
 165        int cpu;
 166
 167        for_each_possible_cpu(cpu) {
 168                struct veth_net_stats *stats = per_cpu_ptr(priv->stats, cpu);
 169                u64 rx_packets, rx_bytes, rx_dropped;
 170                u64 tx_packets, tx_bytes;
 171                unsigned int start;
 172
 173                do {
 174                        start = u64_stats_fetch_begin_bh(&stats->syncp);
 175                        rx_packets = stats->rx_packets;
 176                        tx_packets = stats->tx_packets;
 177                        rx_bytes = stats->rx_bytes;
 178                        tx_bytes = stats->tx_bytes;
 179                        rx_dropped = stats->rx_dropped;
 180                } while (u64_stats_fetch_retry_bh(&stats->syncp, start));
 181                tot->rx_packets += rx_packets;
 182                tot->tx_packets += tx_packets;
 183                tot->rx_bytes   += rx_bytes;
 184                tot->tx_bytes   += tx_bytes;
 185                tot->rx_dropped += rx_dropped;
 186        }
 187
 188        return tot;
 189}
 190
 191static int veth_open(struct net_device *dev)
 192{
 193        struct veth_priv *priv;
 194
 195        priv = netdev_priv(dev);
 196        if (priv->peer == NULL)
 197                return -ENOTCONN;
 198
 199        if (priv->peer->flags & IFF_UP) {
 200                netif_carrier_on(dev);
 201                netif_carrier_on(priv->peer);
 202        }
 203        return 0;
 204}
 205
 206static int veth_close(struct net_device *dev)
 207{
 208        struct veth_priv *priv = netdev_priv(dev);
 209
 210        netif_carrier_off(dev);
 211        netif_carrier_off(priv->peer);
 212
 213        return 0;
 214}
 215
 216static int is_valid_veth_mtu(int new_mtu)
 217{
 218        return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
 219}
 220
 221static int veth_change_mtu(struct net_device *dev, int new_mtu)
 222{
 223        if (!is_valid_veth_mtu(new_mtu))
 224                return -EINVAL;
 225        dev->mtu = new_mtu;
 226        return 0;
 227}
 228
 229static int veth_dev_init(struct net_device *dev)
 230{
 231        struct veth_net_stats __percpu *stats;
 232        struct veth_priv *priv;
 233
 234        stats = alloc_percpu(struct veth_net_stats);
 235        if (stats == NULL)
 236                return -ENOMEM;
 237
 238        priv = netdev_priv(dev);
 239        priv->stats = stats;
 240        return 0;
 241}
 242
 243static void veth_dev_free(struct net_device *dev)
 244{
 245        struct veth_priv *priv;
 246
 247        priv = netdev_priv(dev);
 248        free_percpu(priv->stats);
 249        free_netdev(dev);
 250}
 251
 252static const struct net_device_ops veth_netdev_ops = {
 253        .ndo_init            = veth_dev_init,
 254        .ndo_open            = veth_open,
 255        .ndo_stop            = veth_close,
 256        .ndo_start_xmit      = veth_xmit,
 257        .ndo_change_mtu      = veth_change_mtu,
 258        .ndo_get_stats64     = veth_get_stats64,
 259        .ndo_set_mac_address = eth_mac_addr,
 260};
 261
 262static void veth_setup(struct net_device *dev)
 263{
 264        ether_setup(dev);
 265
 266        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 267
 268        dev->netdev_ops = &veth_netdev_ops;
 269        dev->ethtool_ops = &veth_ethtool_ops;
 270        dev->features |= NETIF_F_LLTX;
 271        dev->destructor = veth_dev_free;
 272
 273        dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_RXCSUM;
 274}
 275
 276/*
 277 * netlink interface
 278 */
 279
 280static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
 281{
 282        if (tb[IFLA_ADDRESS]) {
 283                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
 284                        return -EINVAL;
 285                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 286                        return -EADDRNOTAVAIL;
 287        }
 288        if (tb[IFLA_MTU]) {
 289                if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
 290                        return -EINVAL;
 291        }
 292        return 0;
 293}
 294
 295static struct rtnl_link_ops veth_link_ops;
 296
 297static int veth_newlink(struct net *src_net, struct net_device *dev,
 298                         struct nlattr *tb[], struct nlattr *data[])
 299{
 300        int err;
 301        struct net_device *peer;
 302        struct veth_priv *priv;
 303        char ifname[IFNAMSIZ];
 304        struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
 305        struct ifinfomsg *ifmp;
 306        struct net *net;
 307
 308        /*
 309         * create and register peer first
 310         */
 311        if (data != NULL && data[VETH_INFO_PEER] != NULL) {
 312                struct nlattr *nla_peer;
 313
 314                nla_peer = data[VETH_INFO_PEER];
 315                ifmp = nla_data(nla_peer);
 316                err = nla_parse(peer_tb, IFLA_MAX,
 317                                nla_data(nla_peer) + sizeof(struct ifinfomsg),
 318                                nla_len(nla_peer) - sizeof(struct ifinfomsg),
 319                                ifla_policy);
 320                if (err < 0)
 321                        return err;
 322
 323                err = veth_validate(peer_tb, NULL);
 324                if (err < 0)
 325                        return err;
 326
 327                tbp = peer_tb;
 328        } else {
 329                ifmp = NULL;
 330                tbp = tb;
 331        }
 332
 333        if (tbp[IFLA_IFNAME])
 334                nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
 335        else
 336                snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
 337
 338        net = rtnl_link_get_net(src_net, tbp);
 339        if (IS_ERR(net))
 340                return PTR_ERR(net);
 341
 342        peer = rtnl_create_link(src_net, net, ifname, &veth_link_ops, tbp);
 343        if (IS_ERR(peer)) {
 344                put_net(net);
 345                return PTR_ERR(peer);
 346        }
 347
 348        if (tbp[IFLA_ADDRESS] == NULL)
 349                eth_hw_addr_random(peer);
 350
 351        if (ifmp && (dev->ifindex != 0))
 352                peer->ifindex = ifmp->ifi_index;
 353
 354        err = register_netdevice(peer);
 355        put_net(net);
 356        net = NULL;
 357        if (err < 0)
 358                goto err_register_peer;
 359
 360        netif_carrier_off(peer);
 361
 362        err = rtnl_configure_link(peer, ifmp);
 363        if (err < 0)
 364                goto err_configure_peer;
 365
 366        /*
 367         * register dev last
 368         *
 369         * note, that since we've registered new device the dev's name
 370         * should be re-allocated
 371         */
 372
 373        if (tb[IFLA_ADDRESS] == NULL)
 374                eth_hw_addr_random(dev);
 375
 376        if (tb[IFLA_IFNAME])
 377                nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
 378        else
 379                snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
 380
 381        if (strchr(dev->name, '%')) {
 382                err = dev_alloc_name(dev, dev->name);
 383                if (err < 0)
 384                        goto err_alloc_name;
 385        }
 386
 387        err = register_netdevice(dev);
 388        if (err < 0)
 389                goto err_register_dev;
 390
 391        netif_carrier_off(dev);
 392
 393        /*
 394         * tie the deviced together
 395         */
 396
 397        priv = netdev_priv(dev);
 398        priv->peer = peer;
 399
 400        priv = netdev_priv(peer);
 401        priv->peer = dev;
 402        return 0;
 403
 404err_register_dev:
 405        /* nothing to do */
 406err_alloc_name:
 407err_configure_peer:
 408        unregister_netdevice(peer);
 409        return err;
 410
 411err_register_peer:
 412        free_netdev(peer);
 413        return err;
 414}
 415
 416static void veth_dellink(struct net_device *dev, struct list_head *head)
 417{
 418        struct veth_priv *priv;
 419        struct net_device *peer;
 420
 421        priv = netdev_priv(dev);
 422        peer = priv->peer;
 423
 424        unregister_netdevice_queue(dev, head);
 425        unregister_netdevice_queue(peer, head);
 426}
 427
 428static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
 429        [VETH_INFO_PEER]        = { .len = sizeof(struct ifinfomsg) },
 430};
 431
 432static struct rtnl_link_ops veth_link_ops = {
 433        .kind           = DRV_NAME,
 434        .priv_size      = sizeof(struct veth_priv),
 435        .setup          = veth_setup,
 436        .validate       = veth_validate,
 437        .newlink        = veth_newlink,
 438        .dellink        = veth_dellink,
 439        .policy         = veth_policy,
 440        .maxtype        = VETH_INFO_MAX,
 441};
 442
 443/*
 444 * init/fini
 445 */
 446
 447static __init int veth_init(void)
 448{
 449        return rtnl_link_register(&veth_link_ops);
 450}
 451
 452static __exit void veth_exit(void)
 453{
 454        rtnl_link_unregister(&veth_link_ops);
 455}
 456
 457module_init(veth_init);
 458module_exit(veth_exit);
 459
 460MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
 461MODULE_LICENSE("GPL v2");
 462MODULE_ALIAS_RTNL_LINK(DRV_NAME);
 463