linux/drivers/net/veth.c
<<
>>
Prefs
   1/*
   2 *  drivers/net/veth.c
   3 *
   4 *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
   5 *
   6 * Author: Pavel Emelianov <xemul@openvz.org>
   7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
   8 *
   9 */
  10
  11#include <linux/netdevice.h>
  12#include <linux/slab.h>
  13#include <linux/ethtool.h>
  14#include <linux/etherdevice.h>
  15#include <linux/u64_stats_sync.h>
  16
  17#include <net/rtnetlink.h>
  18#include <net/dst.h>
  19#include <net/xfrm.h>
  20#include <net/xdp.h>
  21#include <linux/veth.h>
  22#include <linux/module.h>
  23#include <linux/bpf.h>
  24#include <linux/filter.h>
  25#include <linux/ptr_ring.h>
  26#include <linux/bpf_trace.h>
  27#include <linux/net_tstamp.h>
  28
  29#define DRV_NAME        "veth"
  30#define DRV_VERSION     "1.0"
  31
  32#define VETH_XDP_FLAG           BIT(0)
  33#define VETH_RING_SIZE          256
  34#define VETH_XDP_HEADROOM       (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
  35
  36/* Separating two types of XDP xmit */
  37#define VETH_XDP_TX             BIT(0)
  38#define VETH_XDP_REDIR          BIT(1)
  39
  40struct veth_rq_stats {
  41        u64                     xdp_packets;
  42        u64                     xdp_bytes;
  43        u64                     xdp_drops;
  44        struct u64_stats_sync   syncp;
  45};
  46
  47struct veth_rq {
  48        struct napi_struct      xdp_napi;
  49        struct net_device       *dev;
  50        struct bpf_prog __rcu   *xdp_prog;
  51        struct xdp_mem_info     xdp_mem;
  52        struct veth_rq_stats    stats;
  53        bool                    rx_notify_masked;
  54        struct ptr_ring         xdp_ring;
  55        struct xdp_rxq_info     xdp_rxq;
  56};
  57
  58struct veth_priv {
  59        struct net_device __rcu *peer;
  60        atomic64_t              dropped;
  61        struct bpf_prog         *_xdp_prog;
  62        struct veth_rq          *rq;
  63        unsigned int            requested_headroom;
  64};
  65
  66/*
  67 * ethtool interface
  68 */
  69
  70struct veth_q_stat_desc {
  71        char    desc[ETH_GSTRING_LEN];
  72        size_t  offset;
  73};
  74
  75#define VETH_RQ_STAT(m) offsetof(struct veth_rq_stats, m)
  76
  77static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
  78        { "xdp_packets",        VETH_RQ_STAT(xdp_packets) },
  79        { "xdp_bytes",          VETH_RQ_STAT(xdp_bytes) },
  80        { "xdp_drops",          VETH_RQ_STAT(xdp_drops) },
  81};
  82
  83#define VETH_RQ_STATS_LEN       ARRAY_SIZE(veth_rq_stats_desc)
  84
  85static struct {
  86        const char string[ETH_GSTRING_LEN];
  87} ethtool_stats_keys[] = {
  88        { "peer_ifindex" },
  89};
  90
  91static int veth_get_link_ksettings(struct net_device *dev,
  92                                   struct ethtool_link_ksettings *cmd)
  93{
  94        cmd->base.speed         = SPEED_10000;
  95        cmd->base.duplex        = DUPLEX_FULL;
  96        cmd->base.port          = PORT_TP;
  97        cmd->base.autoneg       = AUTONEG_DISABLE;
  98        return 0;
  99}
 100
 101static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 102{
 103        strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
 104        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 105}
 106
 107static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 108{
 109        char *p = (char *)buf;
 110        int i, j;
 111
 112        switch(stringset) {
 113        case ETH_SS_STATS:
 114                memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
 115                p += sizeof(ethtool_stats_keys);
 116                for (i = 0; i < dev->real_num_rx_queues; i++) {
 117                        for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
 118                                snprintf(p, ETH_GSTRING_LEN,
 119                                         "rx_queue_%u_%.11s",
 120                                         i, veth_rq_stats_desc[j].desc);
 121                                p += ETH_GSTRING_LEN;
 122                        }
 123                }
 124                break;
 125        }
 126}
 127
 128static int veth_get_sset_count(struct net_device *dev, int sset)
 129{
 130        switch (sset) {
 131        case ETH_SS_STATS:
 132                return ARRAY_SIZE(ethtool_stats_keys) +
 133                       VETH_RQ_STATS_LEN * dev->real_num_rx_queues;
 134        default:
 135                return -EOPNOTSUPP;
 136        }
 137}
 138
 139static void veth_get_ethtool_stats(struct net_device *dev,
 140                struct ethtool_stats *stats, u64 *data)
 141{
 142        struct veth_priv *priv = netdev_priv(dev);
 143        struct net_device *peer = rtnl_dereference(priv->peer);
 144        int i, j, idx;
 145
 146        data[0] = peer ? peer->ifindex : 0;
 147        idx = 1;
 148        for (i = 0; i < dev->real_num_rx_queues; i++) {
 149                const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
 150                const void *stats_base = (void *)rq_stats;
 151                unsigned int start;
 152                size_t offset;
 153
 154                do {
 155                        start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
 156                        for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
 157                                offset = veth_rq_stats_desc[j].offset;
 158                                data[idx + j] = *(u64 *)(stats_base + offset);
 159                        }
 160                } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
 161                idx += VETH_RQ_STATS_LEN;
 162        }
 163}
 164
 165static int veth_get_ts_info(struct net_device *dev,
 166                            struct ethtool_ts_info *info)
 167{
 168        info->so_timestamping =
 169                SOF_TIMESTAMPING_TX_SOFTWARE |
 170                SOF_TIMESTAMPING_RX_SOFTWARE |
 171                SOF_TIMESTAMPING_SOFTWARE;
 172        info->phc_index = -1;
 173
 174        return 0;
 175}
 176
 177static const struct ethtool_ops veth_ethtool_ops = {
 178        .get_drvinfo            = veth_get_drvinfo,
 179        .get_link               = ethtool_op_get_link,
 180        .get_strings            = veth_get_strings,
 181        .get_sset_count         = veth_get_sset_count,
 182        .get_ethtool_stats      = veth_get_ethtool_stats,
 183        .get_link_ksettings     = veth_get_link_ksettings,
 184        .get_ts_info            = veth_get_ts_info,
 185};
 186
 187/* general routines */
 188
 189static bool veth_is_xdp_frame(void *ptr)
 190{
 191        return (unsigned long)ptr & VETH_XDP_FLAG;
 192}
 193
 194static void *veth_ptr_to_xdp(void *ptr)
 195{
 196        return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
 197}
 198
 199static void *veth_xdp_to_ptr(void *ptr)
 200{
 201        return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
 202}
 203
 204static void veth_ptr_free(void *ptr)
 205{
 206        if (veth_is_xdp_frame(ptr))
 207                xdp_return_frame(veth_ptr_to_xdp(ptr));
 208        else
 209                kfree_skb(ptr);
 210}
 211
 212static void __veth_xdp_flush(struct veth_rq *rq)
 213{
 214        /* Write ptr_ring before reading rx_notify_masked */
 215        smp_mb();
 216        if (!rq->rx_notify_masked) {
 217                rq->rx_notify_masked = true;
 218                napi_schedule(&rq->xdp_napi);
 219        }
 220}
 221
 222static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
 223{
 224        if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
 225                dev_kfree_skb_any(skb);
 226                return NET_RX_DROP;
 227        }
 228
 229        return NET_RX_SUCCESS;
 230}
 231
 232static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
 233                            struct veth_rq *rq, bool xdp)
 234{
 235        return __dev_forward_skb(dev, skb) ?: xdp ?
 236                veth_xdp_rx(rq, skb) :
 237                netif_rx(skb);
 238}
 239
 240static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 241{
 242        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 243        struct veth_rq *rq = NULL;
 244        struct net_device *rcv;
 245        int length = skb->len;
 246        bool rcv_xdp = false;
 247        int rxq;
 248
 249        rcu_read_lock();
 250        rcv = rcu_dereference(priv->peer);
 251        if (unlikely(!rcv)) {
 252                kfree_skb(skb);
 253                goto drop;
 254        }
 255
 256        rcv_priv = netdev_priv(rcv);
 257        rxq = skb_get_queue_mapping(skb);
 258        if (rxq < rcv->real_num_rx_queues) {
 259                rq = &rcv_priv->rq[rxq];
 260                rcv_xdp = rcu_access_pointer(rq->xdp_prog);
 261                if (rcv_xdp)
 262                        skb_record_rx_queue(skb, rxq);
 263        }
 264
 265        skb_tx_timestamp(skb);
 266        if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
 267                if (!rcv_xdp) {
 268                        struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
 269
 270                        u64_stats_update_begin(&stats->syncp);
 271                        stats->bytes += length;
 272                        stats->packets++;
 273                        u64_stats_update_end(&stats->syncp);
 274                }
 275        } else {
 276drop:
 277                atomic64_inc(&priv->dropped);
 278        }
 279
 280        if (rcv_xdp)
 281                __veth_xdp_flush(rq);
 282
 283        rcu_read_unlock();
 284
 285        return NETDEV_TX_OK;
 286}
 287
 288static u64 veth_stats_tx(struct pcpu_lstats *result, struct net_device *dev)
 289{
 290        struct veth_priv *priv = netdev_priv(dev);
 291        int cpu;
 292
 293        result->packets = 0;
 294        result->bytes = 0;
 295        for_each_possible_cpu(cpu) {
 296                struct pcpu_lstats *stats = per_cpu_ptr(dev->lstats, cpu);
 297                u64 packets, bytes;
 298                unsigned int start;
 299
 300                do {
 301                        start = u64_stats_fetch_begin_irq(&stats->syncp);
 302                        packets = stats->packets;
 303                        bytes = stats->bytes;
 304                } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
 305                result->packets += packets;
 306                result->bytes += bytes;
 307        }
 308        return atomic64_read(&priv->dropped);
 309}
 310
 311static void veth_stats_rx(struct veth_rq_stats *result, struct net_device *dev)
 312{
 313        struct veth_priv *priv = netdev_priv(dev);
 314        int i;
 315
 316        result->xdp_packets = 0;
 317        result->xdp_bytes = 0;
 318        result->xdp_drops = 0;
 319        for (i = 0; i < dev->num_rx_queues; i++) {
 320                struct veth_rq_stats *stats = &priv->rq[i].stats;
 321                u64 packets, bytes, drops;
 322                unsigned int start;
 323
 324                do {
 325                        start = u64_stats_fetch_begin_irq(&stats->syncp);
 326                        packets = stats->xdp_packets;
 327                        bytes = stats->xdp_bytes;
 328                        drops = stats->xdp_drops;
 329                } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
 330                result->xdp_packets += packets;
 331                result->xdp_bytes += bytes;
 332                result->xdp_drops += drops;
 333        }
 334}
 335
 336static void veth_get_stats64(struct net_device *dev,
 337                             struct rtnl_link_stats64 *tot)
 338{
 339        struct veth_priv *priv = netdev_priv(dev);
 340        struct net_device *peer;
 341        struct veth_rq_stats rx;
 342        struct pcpu_lstats tx;
 343
 344        tot->tx_dropped = veth_stats_tx(&tx, dev);
 345        tot->tx_bytes = tx.bytes;
 346        tot->tx_packets = tx.packets;
 347
 348        veth_stats_rx(&rx, dev);
 349        tot->rx_dropped = rx.xdp_drops;
 350        tot->rx_bytes = rx.xdp_bytes;
 351        tot->rx_packets = rx.xdp_packets;
 352
 353        rcu_read_lock();
 354        peer = rcu_dereference(priv->peer);
 355        if (peer) {
 356                tot->rx_dropped += veth_stats_tx(&tx, peer);
 357                tot->rx_bytes += tx.bytes;
 358                tot->rx_packets += tx.packets;
 359
 360                veth_stats_rx(&rx, peer);
 361                tot->tx_bytes += rx.xdp_bytes;
 362                tot->tx_packets += rx.xdp_packets;
 363        }
 364        rcu_read_unlock();
 365}
 366
 367/* fake multicast ability */
 368static void veth_set_multicast_list(struct net_device *dev)
 369{
 370}
 371
 372static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
 373                                      int buflen)
 374{
 375        struct sk_buff *skb;
 376
 377        if (!buflen) {
 378                buflen = SKB_DATA_ALIGN(headroom + len) +
 379                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 380        }
 381        skb = build_skb(head, buflen);
 382        if (!skb)
 383                return NULL;
 384
 385        skb_reserve(skb, headroom);
 386        skb_put(skb, len);
 387
 388        return skb;
 389}
 390
 391static int veth_select_rxq(struct net_device *dev)
 392{
 393        return smp_processor_id() % dev->real_num_rx_queues;
 394}
 395
 396static int veth_xdp_xmit(struct net_device *dev, int n,
 397                         struct xdp_frame **frames, u32 flags)
 398{
 399        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 400        struct net_device *rcv;
 401        int i, ret, drops = n;
 402        unsigned int max_len;
 403        struct veth_rq *rq;
 404
 405        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
 406                ret = -EINVAL;
 407                goto drop;
 408        }
 409
 410        rcv = rcu_dereference(priv->peer);
 411        if (unlikely(!rcv)) {
 412                ret = -ENXIO;
 413                goto drop;
 414        }
 415
 416        rcv_priv = netdev_priv(rcv);
 417        rq = &rcv_priv->rq[veth_select_rxq(rcv)];
 418        /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
 419         * side. This means an XDP program is loaded on the peer and the peer
 420         * device is up.
 421         */
 422        if (!rcu_access_pointer(rq->xdp_prog)) {
 423                ret = -ENXIO;
 424                goto drop;
 425        }
 426
 427        drops = 0;
 428        max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
 429
 430        spin_lock(&rq->xdp_ring.producer_lock);
 431        for (i = 0; i < n; i++) {
 432                struct xdp_frame *frame = frames[i];
 433                void *ptr = veth_xdp_to_ptr(frame);
 434
 435                if (unlikely(frame->len > max_len ||
 436                             __ptr_ring_produce(&rq->xdp_ring, ptr))) {
 437                        xdp_return_frame_rx_napi(frame);
 438                        drops++;
 439                }
 440        }
 441        spin_unlock(&rq->xdp_ring.producer_lock);
 442
 443        if (flags & XDP_XMIT_FLUSH)
 444                __veth_xdp_flush(rq);
 445
 446        if (likely(!drops))
 447                return n;
 448
 449        ret = n - drops;
 450drop:
 451        atomic64_add(drops, &priv->dropped);
 452
 453        return ret;
 454}
 455
 456static void veth_xdp_flush(struct net_device *dev)
 457{
 458        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 459        struct net_device *rcv;
 460        struct veth_rq *rq;
 461
 462        rcu_read_lock();
 463        rcv = rcu_dereference(priv->peer);
 464        if (unlikely(!rcv))
 465                goto out;
 466
 467        rcv_priv = netdev_priv(rcv);
 468        rq = &rcv_priv->rq[veth_select_rxq(rcv)];
 469        /* xdp_ring is initialized on receive side? */
 470        if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
 471                goto out;
 472
 473        __veth_xdp_flush(rq);
 474out:
 475        rcu_read_unlock();
 476}
 477
 478static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
 479{
 480        struct xdp_frame *frame = convert_to_xdp_frame(xdp);
 481
 482        if (unlikely(!frame))
 483                return -EOVERFLOW;
 484
 485        return veth_xdp_xmit(dev, 1, &frame, 0);
 486}
 487
 488static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 489                                        struct xdp_frame *frame,
 490                                        unsigned int *xdp_xmit)
 491{
 492        void *hard_start = frame->data - frame->headroom;
 493        void *head = hard_start - sizeof(struct xdp_frame);
 494        int len = frame->len, delta = 0;
 495        struct xdp_frame orig_frame;
 496        struct bpf_prog *xdp_prog;
 497        unsigned int headroom;
 498        struct sk_buff *skb;
 499
 500        rcu_read_lock();
 501        xdp_prog = rcu_dereference(rq->xdp_prog);
 502        if (likely(xdp_prog)) {
 503                struct xdp_buff xdp;
 504                u32 act;
 505
 506                xdp.data_hard_start = hard_start;
 507                xdp.data = frame->data;
 508                xdp.data_end = frame->data + frame->len;
 509                xdp.data_meta = frame->data - frame->metasize;
 510                xdp.rxq = &rq->xdp_rxq;
 511
 512                act = bpf_prog_run_xdp(xdp_prog, &xdp);
 513
 514                switch (act) {
 515                case XDP_PASS:
 516                        delta = frame->data - xdp.data;
 517                        len = xdp.data_end - xdp.data;
 518                        break;
 519                case XDP_TX:
 520                        orig_frame = *frame;
 521                        xdp.data_hard_start = head;
 522                        xdp.rxq->mem = frame->mem;
 523                        if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
 524                                trace_xdp_exception(rq->dev, xdp_prog, act);
 525                                frame = &orig_frame;
 526                                goto err_xdp;
 527                        }
 528                        *xdp_xmit |= VETH_XDP_TX;
 529                        rcu_read_unlock();
 530                        goto xdp_xmit;
 531                case XDP_REDIRECT:
 532                        orig_frame = *frame;
 533                        xdp.data_hard_start = head;
 534                        xdp.rxq->mem = frame->mem;
 535                        if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
 536                                frame = &orig_frame;
 537                                goto err_xdp;
 538                        }
 539                        *xdp_xmit |= VETH_XDP_REDIR;
 540                        rcu_read_unlock();
 541                        goto xdp_xmit;
 542                default:
 543                        bpf_warn_invalid_xdp_action(act);
 544                        /* fall through */
 545                case XDP_ABORTED:
 546                        trace_xdp_exception(rq->dev, xdp_prog, act);
 547                        /* fall through */
 548                case XDP_DROP:
 549                        goto err_xdp;
 550                }
 551        }
 552        rcu_read_unlock();
 553
 554        headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
 555        skb = veth_build_skb(head, headroom, len, 0);
 556        if (!skb) {
 557                xdp_return_frame(frame);
 558                goto err;
 559        }
 560
 561        xdp_scrub_frame(frame);
 562        skb->protocol = eth_type_trans(skb, rq->dev);
 563err:
 564        return skb;
 565err_xdp:
 566        rcu_read_unlock();
 567        xdp_return_frame(frame);
 568xdp_xmit:
 569        return NULL;
 570}
 571
 572static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
 573                                        unsigned int *xdp_xmit)
 574{
 575        u32 pktlen, headroom, act, metalen;
 576        void *orig_data, *orig_data_end;
 577        struct bpf_prog *xdp_prog;
 578        int mac_len, delta, off;
 579        struct xdp_buff xdp;
 580
 581        skb_orphan(skb);
 582
 583        rcu_read_lock();
 584        xdp_prog = rcu_dereference(rq->xdp_prog);
 585        if (unlikely(!xdp_prog)) {
 586                rcu_read_unlock();
 587                goto out;
 588        }
 589
 590        mac_len = skb->data - skb_mac_header(skb);
 591        pktlen = skb->len + mac_len;
 592        headroom = skb_headroom(skb) - mac_len;
 593
 594        if (skb_shared(skb) || skb_head_is_locked(skb) ||
 595            skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
 596                struct sk_buff *nskb;
 597                int size, head_off;
 598                void *head, *start;
 599                struct page *page;
 600
 601                size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
 602                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 603                if (size > PAGE_SIZE)
 604                        goto drop;
 605
 606                page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 607                if (!page)
 608                        goto drop;
 609
 610                head = page_address(page);
 611                start = head + VETH_XDP_HEADROOM;
 612                if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
 613                        page_frag_free(head);
 614                        goto drop;
 615                }
 616
 617                nskb = veth_build_skb(head,
 618                                      VETH_XDP_HEADROOM + mac_len, skb->len,
 619                                      PAGE_SIZE);
 620                if (!nskb) {
 621                        page_frag_free(head);
 622                        goto drop;
 623                }
 624
 625                skb_copy_header(nskb, skb);
 626                head_off = skb_headroom(nskb) - skb_headroom(skb);
 627                skb_headers_offset_update(nskb, head_off);
 628                consume_skb(skb);
 629                skb = nskb;
 630        }
 631
 632        xdp.data_hard_start = skb->head;
 633        xdp.data = skb_mac_header(skb);
 634        xdp.data_end = xdp.data + pktlen;
 635        xdp.data_meta = xdp.data;
 636        xdp.rxq = &rq->xdp_rxq;
 637        orig_data = xdp.data;
 638        orig_data_end = xdp.data_end;
 639
 640        act = bpf_prog_run_xdp(xdp_prog, &xdp);
 641
 642        switch (act) {
 643        case XDP_PASS:
 644                break;
 645        case XDP_TX:
 646                get_page(virt_to_page(xdp.data));
 647                consume_skb(skb);
 648                xdp.rxq->mem = rq->xdp_mem;
 649                if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
 650                        trace_xdp_exception(rq->dev, xdp_prog, act);
 651                        goto err_xdp;
 652                }
 653                *xdp_xmit |= VETH_XDP_TX;
 654                rcu_read_unlock();
 655                goto xdp_xmit;
 656        case XDP_REDIRECT:
 657                get_page(virt_to_page(xdp.data));
 658                consume_skb(skb);
 659                xdp.rxq->mem = rq->xdp_mem;
 660                if (xdp_do_redirect(rq->dev, &xdp, xdp_prog))
 661                        goto err_xdp;
 662                *xdp_xmit |= VETH_XDP_REDIR;
 663                rcu_read_unlock();
 664                goto xdp_xmit;
 665        default:
 666                bpf_warn_invalid_xdp_action(act);
 667                /* fall through */
 668        case XDP_ABORTED:
 669                trace_xdp_exception(rq->dev, xdp_prog, act);
 670                /* fall through */
 671        case XDP_DROP:
 672                goto drop;
 673        }
 674        rcu_read_unlock();
 675
 676        delta = orig_data - xdp.data;
 677        off = mac_len + delta;
 678        if (off > 0)
 679                __skb_push(skb, off);
 680        else if (off < 0)
 681                __skb_pull(skb, -off);
 682        skb->mac_header -= delta;
 683        off = xdp.data_end - orig_data_end;
 684        if (off != 0)
 685                __skb_put(skb, off);
 686        skb->protocol = eth_type_trans(skb, rq->dev);
 687
 688        metalen = xdp.data - xdp.data_meta;
 689        if (metalen)
 690                skb_metadata_set(skb, metalen);
 691out:
 692        return skb;
 693drop:
 694        rcu_read_unlock();
 695        kfree_skb(skb);
 696        return NULL;
 697err_xdp:
 698        rcu_read_unlock();
 699        page_frag_free(xdp.data);
 700xdp_xmit:
 701        return NULL;
 702}
 703
 704static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
 705{
 706        int i, done = 0, drops = 0, bytes = 0;
 707
 708        for (i = 0; i < budget; i++) {
 709                void *ptr = __ptr_ring_consume(&rq->xdp_ring);
 710                unsigned int xdp_xmit_one = 0;
 711                struct sk_buff *skb;
 712
 713                if (!ptr)
 714                        break;
 715
 716                if (veth_is_xdp_frame(ptr)) {
 717                        struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
 718
 719                        bytes += frame->len;
 720                        skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one);
 721                } else {
 722                        skb = ptr;
 723                        bytes += skb->len;
 724                        skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one);
 725                }
 726                *xdp_xmit |= xdp_xmit_one;
 727
 728                if (skb)
 729                        napi_gro_receive(&rq->xdp_napi, skb);
 730                else if (!xdp_xmit_one)
 731                        drops++;
 732
 733                done++;
 734        }
 735
 736        u64_stats_update_begin(&rq->stats.syncp);
 737        rq->stats.xdp_packets += done;
 738        rq->stats.xdp_bytes += bytes;
 739        rq->stats.xdp_drops += drops;
 740        u64_stats_update_end(&rq->stats.syncp);
 741
 742        return done;
 743}
 744
 745static int veth_poll(struct napi_struct *napi, int budget)
 746{
 747        struct veth_rq *rq =
 748                container_of(napi, struct veth_rq, xdp_napi);
 749        unsigned int xdp_xmit = 0;
 750        int done;
 751
 752        xdp_set_return_frame_no_direct();
 753        done = veth_xdp_rcv(rq, budget, &xdp_xmit);
 754
 755        if (done < budget && napi_complete_done(napi, done)) {
 756                /* Write rx_notify_masked before reading ptr_ring */
 757                smp_store_mb(rq->rx_notify_masked, false);
 758                if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
 759                        rq->rx_notify_masked = true;
 760                        napi_schedule(&rq->xdp_napi);
 761                }
 762        }
 763
 764        if (xdp_xmit & VETH_XDP_TX)
 765                veth_xdp_flush(rq->dev);
 766        if (xdp_xmit & VETH_XDP_REDIR)
 767                xdp_do_flush_map();
 768        xdp_clear_return_frame_no_direct();
 769
 770        return done;
 771}
 772
 773static int veth_napi_add(struct net_device *dev)
 774{
 775        struct veth_priv *priv = netdev_priv(dev);
 776        int err, i;
 777
 778        for (i = 0; i < dev->real_num_rx_queues; i++) {
 779                struct veth_rq *rq = &priv->rq[i];
 780
 781                err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
 782                if (err)
 783                        goto err_xdp_ring;
 784        }
 785
 786        for (i = 0; i < dev->real_num_rx_queues; i++) {
 787                struct veth_rq *rq = &priv->rq[i];
 788
 789                netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
 790                napi_enable(&rq->xdp_napi);
 791        }
 792
 793        return 0;
 794err_xdp_ring:
 795        for (i--; i >= 0; i--)
 796                ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
 797
 798        return err;
 799}
 800
 801static void veth_napi_del(struct net_device *dev)
 802{
 803        struct veth_priv *priv = netdev_priv(dev);
 804        int i;
 805
 806        for (i = 0; i < dev->real_num_rx_queues; i++) {
 807                struct veth_rq *rq = &priv->rq[i];
 808
 809                napi_disable(&rq->xdp_napi);
 810                napi_hash_del(&rq->xdp_napi);
 811        }
 812        synchronize_net();
 813
 814        for (i = 0; i < dev->real_num_rx_queues; i++) {
 815                struct veth_rq *rq = &priv->rq[i];
 816
 817                netif_napi_del(&rq->xdp_napi);
 818                rq->rx_notify_masked = false;
 819                ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
 820        }
 821}
 822
 823static int veth_enable_xdp(struct net_device *dev)
 824{
 825        struct veth_priv *priv = netdev_priv(dev);
 826        int err, i;
 827
 828        if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
 829                for (i = 0; i < dev->real_num_rx_queues; i++) {
 830                        struct veth_rq *rq = &priv->rq[i];
 831
 832                        err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i);
 833                        if (err < 0)
 834                                goto err_rxq_reg;
 835
 836                        err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 837                                                         MEM_TYPE_PAGE_SHARED,
 838                                                         NULL);
 839                        if (err < 0)
 840                                goto err_reg_mem;
 841
 842                        /* Save original mem info as it can be overwritten */
 843                        rq->xdp_mem = rq->xdp_rxq.mem;
 844                }
 845
 846                err = veth_napi_add(dev);
 847                if (err)
 848                        goto err_rxq_reg;
 849        }
 850
 851        for (i = 0; i < dev->real_num_rx_queues; i++)
 852                rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
 853
 854        return 0;
 855err_reg_mem:
 856        xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
 857err_rxq_reg:
 858        for (i--; i >= 0; i--)
 859                xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
 860
 861        return err;
 862}
 863
 864static void veth_disable_xdp(struct net_device *dev)
 865{
 866        struct veth_priv *priv = netdev_priv(dev);
 867        int i;
 868
 869        for (i = 0; i < dev->real_num_rx_queues; i++)
 870                rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
 871        veth_napi_del(dev);
 872        for (i = 0; i < dev->real_num_rx_queues; i++) {
 873                struct veth_rq *rq = &priv->rq[i];
 874
 875                rq->xdp_rxq.mem = rq->xdp_mem;
 876                xdp_rxq_info_unreg(&rq->xdp_rxq);
 877        }
 878}
 879
 880static int veth_open(struct net_device *dev)
 881{
 882        struct veth_priv *priv = netdev_priv(dev);
 883        struct net_device *peer = rtnl_dereference(priv->peer);
 884        int err;
 885
 886        if (!peer)
 887                return -ENOTCONN;
 888
 889        if (priv->_xdp_prog) {
 890                err = veth_enable_xdp(dev);
 891                if (err)
 892                        return err;
 893        }
 894
 895        if (peer->flags & IFF_UP) {
 896                netif_carrier_on(dev);
 897                netif_carrier_on(peer);
 898        }
 899
 900        return 0;
 901}
 902
 903static int veth_close(struct net_device *dev)
 904{
 905        struct veth_priv *priv = netdev_priv(dev);
 906        struct net_device *peer = rtnl_dereference(priv->peer);
 907
 908        netif_carrier_off(dev);
 909        if (peer)
 910                netif_carrier_off(peer);
 911
 912        if (priv->_xdp_prog)
 913                veth_disable_xdp(dev);
 914
 915        return 0;
 916}
 917
 918static int is_valid_veth_mtu(int mtu)
 919{
 920        return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
 921}
 922
 923static int veth_alloc_queues(struct net_device *dev)
 924{
 925        struct veth_priv *priv = netdev_priv(dev);
 926        int i;
 927
 928        priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
 929        if (!priv->rq)
 930                return -ENOMEM;
 931
 932        for (i = 0; i < dev->num_rx_queues; i++) {
 933                priv->rq[i].dev = dev;
 934                u64_stats_init(&priv->rq[i].stats.syncp);
 935        }
 936
 937        return 0;
 938}
 939
 940static void veth_free_queues(struct net_device *dev)
 941{
 942        struct veth_priv *priv = netdev_priv(dev);
 943
 944        kfree(priv->rq);
 945}
 946
 947static int veth_dev_init(struct net_device *dev)
 948{
 949        int err;
 950
 951        dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
 952        if (!dev->lstats)
 953                return -ENOMEM;
 954
 955        err = veth_alloc_queues(dev);
 956        if (err) {
 957                free_percpu(dev->lstats);
 958                return err;
 959        }
 960
 961        return 0;
 962}
 963
 964static void veth_dev_free(struct net_device *dev)
 965{
 966        veth_free_queues(dev);
 967        free_percpu(dev->lstats);
 968}
 969
 970#ifdef CONFIG_NET_POLL_CONTROLLER
 971static void veth_poll_controller(struct net_device *dev)
 972{
 973        /* veth only receives frames when its peer sends one
 974         * Since it has nothing to do with disabling irqs, we are guaranteed
 975         * never to have pending data when we poll for it so
 976         * there is nothing to do here.
 977         *
 978         * We need this though so netpoll recognizes us as an interface that
 979         * supports polling, which enables bridge devices in virt setups to
 980         * still use netconsole
 981         */
 982}
 983#endif  /* CONFIG_NET_POLL_CONTROLLER */
 984
 985static int veth_get_iflink(const struct net_device *dev)
 986{
 987        struct veth_priv *priv = netdev_priv(dev);
 988        struct net_device *peer;
 989        int iflink;
 990
 991        rcu_read_lock();
 992        peer = rcu_dereference(priv->peer);
 993        iflink = peer ? peer->ifindex : 0;
 994        rcu_read_unlock();
 995
 996        return iflink;
 997}
 998
 999static netdev_features_t veth_fix_features(struct net_device *dev,
1000                                           netdev_features_t features)
1001{
1002        struct veth_priv *priv = netdev_priv(dev);
1003        struct net_device *peer;
1004
1005        peer = rtnl_dereference(priv->peer);
1006        if (peer) {
1007                struct veth_priv *peer_priv = netdev_priv(peer);
1008
1009                if (peer_priv->_xdp_prog)
1010                        features &= ~NETIF_F_GSO_SOFTWARE;
1011        }
1012
1013        return features;
1014}
1015
1016static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
1017{
1018        struct veth_priv *peer_priv, *priv = netdev_priv(dev);
1019        struct net_device *peer;
1020
1021        if (new_hr < 0)
1022                new_hr = 0;
1023
1024        rcu_read_lock();
1025        peer = rcu_dereference(priv->peer);
1026        if (unlikely(!peer))
1027                goto out;
1028
1029        peer_priv = netdev_priv(peer);
1030        priv->requested_headroom = new_hr;
1031        new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
1032        dev->needed_headroom = new_hr;
1033        peer->needed_headroom = new_hr;
1034
1035out:
1036        rcu_read_unlock();
1037}
1038
1039static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1040                        struct netlink_ext_ack *extack)
1041{
1042        struct veth_priv *priv = netdev_priv(dev);
1043        struct bpf_prog *old_prog;
1044        struct net_device *peer;
1045        unsigned int max_mtu;
1046        int err;
1047
1048        old_prog = priv->_xdp_prog;
1049        priv->_xdp_prog = prog;
1050        peer = rtnl_dereference(priv->peer);
1051
1052        if (prog) {
1053                if (!peer) {
1054                        NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
1055                        err = -ENOTCONN;
1056                        goto err;
1057                }
1058
1059                max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
1060                          peer->hard_header_len -
1061                          SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1062                if (peer->mtu > max_mtu) {
1063                        NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
1064                        err = -ERANGE;
1065                        goto err;
1066                }
1067
1068                if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
1069                        NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
1070                        err = -ENOSPC;
1071                        goto err;
1072                }
1073
1074                if (dev->flags & IFF_UP) {
1075                        err = veth_enable_xdp(dev);
1076                        if (err) {
1077                                NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
1078                                goto err;
1079                        }
1080                }
1081
1082                if (!old_prog) {
1083                        peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
1084                        peer->max_mtu = max_mtu;
1085                }
1086        }
1087
1088        if (old_prog) {
1089                if (!prog) {
1090                        if (dev->flags & IFF_UP)
1091                                veth_disable_xdp(dev);
1092
1093                        if (peer) {
1094                                peer->hw_features |= NETIF_F_GSO_SOFTWARE;
1095                                peer->max_mtu = ETH_MAX_MTU;
1096                        }
1097                }
1098                bpf_prog_put(old_prog);
1099        }
1100
1101        if ((!!old_prog ^ !!prog) && peer)
1102                netdev_update_features(peer);
1103
1104        return 0;
1105err:
1106        priv->_xdp_prog = old_prog;
1107
1108        return err;
1109}
1110
1111static u32 veth_xdp_query(struct net_device *dev)
1112{
1113        struct veth_priv *priv = netdev_priv(dev);
1114        const struct bpf_prog *xdp_prog;
1115
1116        xdp_prog = priv->_xdp_prog;
1117        if (xdp_prog)
1118                return xdp_prog->aux->id;
1119
1120        return 0;
1121}
1122
1123static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1124{
1125        switch (xdp->command) {
1126        case XDP_SETUP_PROG:
1127                return veth_xdp_set(dev, xdp->prog, xdp->extack);
1128        case XDP_QUERY_PROG:
1129                xdp->prog_id = veth_xdp_query(dev);
1130                return 0;
1131        default:
1132                return -EINVAL;
1133        }
1134}
1135
1136static const struct net_device_ops veth_netdev_ops = {
1137        .ndo_init            = veth_dev_init,
1138        .ndo_open            = veth_open,
1139        .ndo_stop            = veth_close,
1140        .ndo_start_xmit      = veth_xmit,
1141        .ndo_get_stats64     = veth_get_stats64,
1142        .ndo_set_rx_mode     = veth_set_multicast_list,
1143        .ndo_set_mac_address = eth_mac_addr,
1144#ifdef CONFIG_NET_POLL_CONTROLLER
1145        .ndo_poll_controller    = veth_poll_controller,
1146#endif
1147        .ndo_get_iflink         = veth_get_iflink,
1148        .ndo_fix_features       = veth_fix_features,
1149        .ndo_features_check     = passthru_features_check,
1150        .ndo_set_rx_headroom    = veth_set_rx_headroom,
1151        .ndo_bpf                = veth_xdp,
1152        .ndo_xdp_xmit           = veth_xdp_xmit,
1153};
1154
1155#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
1156                       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
1157                       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
1158                       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
1159                       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
1160
1161static void veth_setup(struct net_device *dev)
1162{
1163        ether_setup(dev);
1164
1165        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1166        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1167        dev->priv_flags |= IFF_NO_QUEUE;
1168        dev->priv_flags |= IFF_PHONY_HEADROOM;
1169
1170        dev->netdev_ops = &veth_netdev_ops;
1171        dev->ethtool_ops = &veth_ethtool_ops;
1172        dev->features |= NETIF_F_LLTX;
1173        dev->features |= VETH_FEATURES;
1174        dev->vlan_features = dev->features &
1175                             ~(NETIF_F_HW_VLAN_CTAG_TX |
1176                               NETIF_F_HW_VLAN_STAG_TX |
1177                               NETIF_F_HW_VLAN_CTAG_RX |
1178                               NETIF_F_HW_VLAN_STAG_RX);
1179        dev->needs_free_netdev = true;
1180        dev->priv_destructor = veth_dev_free;
1181        dev->max_mtu = ETH_MAX_MTU;
1182
1183        dev->hw_features = VETH_FEATURES;
1184        dev->hw_enc_features = VETH_FEATURES;
1185        dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
1186}
1187
1188/*
1189 * netlink interface
1190 */
1191
1192static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
1193                         struct netlink_ext_ack *extack)
1194{
1195        if (tb[IFLA_ADDRESS]) {
1196                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1197                        return -EINVAL;
1198                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1199                        return -EADDRNOTAVAIL;
1200        }
1201        if (tb[IFLA_MTU]) {
1202                if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
1203                        return -EINVAL;
1204        }
1205        return 0;
1206}
1207
1208static struct rtnl_link_ops veth_link_ops;
1209
1210static int veth_newlink(struct net *src_net, struct net_device *dev,
1211                        struct nlattr *tb[], struct nlattr *data[],
1212                        struct netlink_ext_ack *extack)
1213{
1214        int err;
1215        struct net_device *peer;
1216        struct veth_priv *priv;
1217        char ifname[IFNAMSIZ];
1218        struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
1219        unsigned char name_assign_type;
1220        struct ifinfomsg *ifmp;
1221        struct net *net;
1222
1223        /*
1224         * create and register peer first
1225         */
1226        if (data != NULL && data[VETH_INFO_PEER] != NULL) {
1227                struct nlattr *nla_peer;
1228
1229                nla_peer = data[VETH_INFO_PEER];
1230                ifmp = nla_data(nla_peer);
1231                err = rtnl_nla_parse_ifla(peer_tb,
1232                                          nla_data(nla_peer) + sizeof(struct ifinfomsg),
1233                                          nla_len(nla_peer) - sizeof(struct ifinfomsg),
1234                                          NULL);
1235                if (err < 0)
1236                        return err;
1237
1238                err = veth_validate(peer_tb, NULL, extack);
1239                if (err < 0)
1240                        return err;
1241
1242                tbp = peer_tb;
1243        } else {
1244                ifmp = NULL;
1245                tbp = tb;
1246        }
1247
1248        if (ifmp && tbp[IFLA_IFNAME]) {
1249                nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
1250                name_assign_type = NET_NAME_USER;
1251        } else {
1252                snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
1253                name_assign_type = NET_NAME_ENUM;
1254        }
1255
1256        net = rtnl_link_get_net(src_net, tbp);
1257        if (IS_ERR(net))
1258                return PTR_ERR(net);
1259
1260        peer = rtnl_create_link(net, ifname, name_assign_type,
1261                                &veth_link_ops, tbp, extack);
1262        if (IS_ERR(peer)) {
1263                put_net(net);
1264                return PTR_ERR(peer);
1265        }
1266
1267        if (!ifmp || !tbp[IFLA_ADDRESS])
1268                eth_hw_addr_random(peer);
1269
1270        if (ifmp && (dev->ifindex != 0))
1271                peer->ifindex = ifmp->ifi_index;
1272
1273        peer->gso_max_size = dev->gso_max_size;
1274        peer->gso_max_segs = dev->gso_max_segs;
1275
1276        err = register_netdevice(peer);
1277        put_net(net);
1278        net = NULL;
1279        if (err < 0)
1280                goto err_register_peer;
1281
1282        netif_carrier_off(peer);
1283
1284        err = rtnl_configure_link(peer, ifmp);
1285        if (err < 0)
1286                goto err_configure_peer;
1287
1288        /*
1289         * register dev last
1290         *
1291         * note, that since we've registered new device the dev's name
1292         * should be re-allocated
1293         */
1294
1295        if (tb[IFLA_ADDRESS] == NULL)
1296                eth_hw_addr_random(dev);
1297
1298        if (tb[IFLA_IFNAME])
1299                nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
1300        else
1301                snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
1302
1303        err = register_netdevice(dev);
1304        if (err < 0)
1305                goto err_register_dev;
1306
1307        netif_carrier_off(dev);
1308
1309        /*
1310         * tie the deviced together
1311         */
1312
1313        priv = netdev_priv(dev);
1314        rcu_assign_pointer(priv->peer, peer);
1315
1316        priv = netdev_priv(peer);
1317        rcu_assign_pointer(priv->peer, dev);
1318
1319        return 0;
1320
1321err_register_dev:
1322        /* nothing to do */
1323err_configure_peer:
1324        unregister_netdevice(peer);
1325        return err;
1326
1327err_register_peer:
1328        free_netdev(peer);
1329        return err;
1330}
1331
1332static void veth_dellink(struct net_device *dev, struct list_head *head)
1333{
1334        struct veth_priv *priv;
1335        struct net_device *peer;
1336
1337        priv = netdev_priv(dev);
1338        peer = rtnl_dereference(priv->peer);
1339
1340        /* Note : dellink() is called from default_device_exit_batch(),
1341         * before a rcu_synchronize() point. The devices are guaranteed
1342         * not being freed before one RCU grace period.
1343         */
1344        RCU_INIT_POINTER(priv->peer, NULL);
1345        unregister_netdevice_queue(dev, head);
1346
1347        if (peer) {
1348                priv = netdev_priv(peer);
1349                RCU_INIT_POINTER(priv->peer, NULL);
1350                unregister_netdevice_queue(peer, head);
1351        }
1352}
1353
1354static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
1355        [VETH_INFO_PEER]        = { .len = sizeof(struct ifinfomsg) },
1356};
1357
1358static struct net *veth_get_link_net(const struct net_device *dev)
1359{
1360        struct veth_priv *priv = netdev_priv(dev);
1361        struct net_device *peer = rtnl_dereference(priv->peer);
1362
1363        return peer ? dev_net(peer) : dev_net(dev);
1364}
1365
1366static struct rtnl_link_ops veth_link_ops = {
1367        .kind           = DRV_NAME,
1368        .priv_size      = sizeof(struct veth_priv),
1369        .setup          = veth_setup,
1370        .validate       = veth_validate,
1371        .newlink        = veth_newlink,
1372        .dellink        = veth_dellink,
1373        .policy         = veth_policy,
1374        .maxtype        = VETH_INFO_MAX,
1375        .get_link_net   = veth_get_link_net,
1376};
1377
1378/*
1379 * init/fini
1380 */
1381
1382static __init int veth_init(void)
1383{
1384        return rtnl_link_register(&veth_link_ops);
1385}
1386
1387static __exit void veth_exit(void)
1388{
1389        rtnl_link_unregister(&veth_link_ops);
1390}
1391
1392module_init(veth_init);
1393module_exit(veth_exit);
1394
1395MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
1396MODULE_LICENSE("GPL v2");
1397MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1398