linux/drivers/net/veth.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  drivers/net/veth.c
   4 *
   5 *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
   6 *
   7 * Author: Pavel Emelianov <xemul@openvz.org>
   8 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
   9 *
  10 */
  11
  12#include <linux/netdevice.h>
  13#include <linux/slab.h>
  14#include <linux/ethtool.h>
  15#include <linux/etherdevice.h>
  16#include <linux/u64_stats_sync.h>
  17
  18#include <net/rtnetlink.h>
  19#include <net/dst.h>
  20#include <net/xfrm.h>
  21#include <net/xdp.h>
  22#include <linux/veth.h>
  23#include <linux/module.h>
  24#include <linux/bpf.h>
  25#include <linux/filter.h>
  26#include <linux/ptr_ring.h>
  27#include <linux/bpf_trace.h>
  28#include <linux/net_tstamp.h>
  29
  30#define DRV_NAME        "veth"
  31#define DRV_VERSION     "1.0"
  32
  33#define VETH_XDP_FLAG           BIT(0)
  34#define VETH_RING_SIZE          256
  35#define VETH_XDP_HEADROOM       (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
  36
  37/* Separating two types of XDP xmit */
  38#define VETH_XDP_TX             BIT(0)
  39#define VETH_XDP_REDIR          BIT(1)
  40
  41#define VETH_XDP_TX_BULK_SIZE   16
  42
  43struct veth_rq_stats {
  44        u64                     xdp_packets;
  45        u64                     xdp_bytes;
  46        u64                     xdp_drops;
  47        struct u64_stats_sync   syncp;
  48};
  49
  50struct veth_rq {
  51        struct napi_struct      xdp_napi;
  52        struct net_device       *dev;
  53        struct bpf_prog __rcu   *xdp_prog;
  54        struct xdp_mem_info     xdp_mem;
  55        struct veth_rq_stats    stats;
  56        bool                    rx_notify_masked;
  57        struct ptr_ring         xdp_ring;
  58        struct xdp_rxq_info     xdp_rxq;
  59};
  60
  61struct veth_priv {
  62        struct net_device __rcu *peer;
  63        atomic64_t              dropped;
  64        struct bpf_prog         *_xdp_prog;
  65        struct veth_rq          *rq;
  66        unsigned int            requested_headroom;
  67};
  68
  69struct veth_xdp_tx_bq {
  70        struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
  71        unsigned int count;
  72};
  73
  74/*
  75 * ethtool interface
  76 */
  77
  78struct veth_q_stat_desc {
  79        char    desc[ETH_GSTRING_LEN];
  80        size_t  offset;
  81};
  82
  83#define VETH_RQ_STAT(m) offsetof(struct veth_rq_stats, m)
  84
  85static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
  86        { "xdp_packets",        VETH_RQ_STAT(xdp_packets) },
  87        { "xdp_bytes",          VETH_RQ_STAT(xdp_bytes) },
  88        { "xdp_drops",          VETH_RQ_STAT(xdp_drops) },
  89};
  90
  91#define VETH_RQ_STATS_LEN       ARRAY_SIZE(veth_rq_stats_desc)
  92
  93static struct {
  94        const char string[ETH_GSTRING_LEN];
  95} ethtool_stats_keys[] = {
  96        { "peer_ifindex" },
  97};
  98
  99static int veth_get_link_ksettings(struct net_device *dev,
 100                                   struct ethtool_link_ksettings *cmd)
 101{
 102        cmd->base.speed         = SPEED_10000;
 103        cmd->base.duplex        = DUPLEX_FULL;
 104        cmd->base.port          = PORT_TP;
 105        cmd->base.autoneg       = AUTONEG_DISABLE;
 106        return 0;
 107}
 108
 109static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 110{
 111        strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
 112        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 113}
 114
 115static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 116{
 117        char *p = (char *)buf;
 118        int i, j;
 119
 120        switch(stringset) {
 121        case ETH_SS_STATS:
 122                memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
 123                p += sizeof(ethtool_stats_keys);
 124                for (i = 0; i < dev->real_num_rx_queues; i++) {
 125                        for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
 126                                snprintf(p, ETH_GSTRING_LEN,
 127                                         "rx_queue_%u_%.11s",
 128                                         i, veth_rq_stats_desc[j].desc);
 129                                p += ETH_GSTRING_LEN;
 130                        }
 131                }
 132                break;
 133        }
 134}
 135
 136static int veth_get_sset_count(struct net_device *dev, int sset)
 137{
 138        switch (sset) {
 139        case ETH_SS_STATS:
 140                return ARRAY_SIZE(ethtool_stats_keys) +
 141                       VETH_RQ_STATS_LEN * dev->real_num_rx_queues;
 142        default:
 143                return -EOPNOTSUPP;
 144        }
 145}
 146
 147static void veth_get_ethtool_stats(struct net_device *dev,
 148                struct ethtool_stats *stats, u64 *data)
 149{
 150        struct veth_priv *priv = netdev_priv(dev);
 151        struct net_device *peer = rtnl_dereference(priv->peer);
 152        int i, j, idx;
 153
 154        data[0] = peer ? peer->ifindex : 0;
 155        idx = 1;
 156        for (i = 0; i < dev->real_num_rx_queues; i++) {
 157                const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
 158                const void *stats_base = (void *)rq_stats;
 159                unsigned int start;
 160                size_t offset;
 161
 162                do {
 163                        start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
 164                        for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
 165                                offset = veth_rq_stats_desc[j].offset;
 166                                data[idx + j] = *(u64 *)(stats_base + offset);
 167                        }
 168                } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
 169                idx += VETH_RQ_STATS_LEN;
 170        }
 171}
 172
 173static const struct ethtool_ops veth_ethtool_ops = {
 174        .get_drvinfo            = veth_get_drvinfo,
 175        .get_link               = ethtool_op_get_link,
 176        .get_strings            = veth_get_strings,
 177        .get_sset_count         = veth_get_sset_count,
 178        .get_ethtool_stats      = veth_get_ethtool_stats,
 179        .get_link_ksettings     = veth_get_link_ksettings,
 180        .get_ts_info            = ethtool_op_get_ts_info,
 181};
 182
 183/* general routines */
 184
 185static bool veth_is_xdp_frame(void *ptr)
 186{
 187        return (unsigned long)ptr & VETH_XDP_FLAG;
 188}
 189
 190static void *veth_ptr_to_xdp(void *ptr)
 191{
 192        return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
 193}
 194
 195static void *veth_xdp_to_ptr(void *ptr)
 196{
 197        return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
 198}
 199
 200static void veth_ptr_free(void *ptr)
 201{
 202        if (veth_is_xdp_frame(ptr))
 203                xdp_return_frame(veth_ptr_to_xdp(ptr));
 204        else
 205                kfree_skb(ptr);
 206}
 207
 208static void __veth_xdp_flush(struct veth_rq *rq)
 209{
 210        /* Write ptr_ring before reading rx_notify_masked */
 211        smp_mb();
 212        if (!rq->rx_notify_masked) {
 213                rq->rx_notify_masked = true;
 214                napi_schedule(&rq->xdp_napi);
 215        }
 216}
 217
 218static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
 219{
 220        if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
 221                dev_kfree_skb_any(skb);
 222                return NET_RX_DROP;
 223        }
 224
 225        return NET_RX_SUCCESS;
 226}
 227
 228static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
 229                            struct veth_rq *rq, bool xdp)
 230{
 231        return __dev_forward_skb(dev, skb) ?: xdp ?
 232                veth_xdp_rx(rq, skb) :
 233                netif_rx(skb);
 234}
 235
 236static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 237{
 238        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 239        struct veth_rq *rq = NULL;
 240        struct net_device *rcv;
 241        int length = skb->len;
 242        bool rcv_xdp = false;
 243        int rxq;
 244
 245        rcu_read_lock();
 246        rcv = rcu_dereference(priv->peer);
 247        if (unlikely(!rcv)) {
 248                kfree_skb(skb);
 249                goto drop;
 250        }
 251
 252        rcv_priv = netdev_priv(rcv);
 253        rxq = skb_get_queue_mapping(skb);
 254        if (rxq < rcv->real_num_rx_queues) {
 255                rq = &rcv_priv->rq[rxq];
 256                rcv_xdp = rcu_access_pointer(rq->xdp_prog);
 257                if (rcv_xdp)
 258                        skb_record_rx_queue(skb, rxq);
 259        }
 260
 261        skb_tx_timestamp(skb);
 262        if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
 263                if (!rcv_xdp)
 264                        dev_lstats_add(dev, length);
 265        } else {
 266drop:
 267                atomic64_inc(&priv->dropped);
 268        }
 269
 270        if (rcv_xdp)
 271                __veth_xdp_flush(rq);
 272
 273        rcu_read_unlock();
 274
 275        return NETDEV_TX_OK;
 276}
 277
 278static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
 279{
 280        struct veth_priv *priv = netdev_priv(dev);
 281
 282        dev_lstats_read(dev, packets, bytes);
 283        return atomic64_read(&priv->dropped);
 284}
 285
 286static void veth_stats_rx(struct veth_rq_stats *result, struct net_device *dev)
 287{
 288        struct veth_priv *priv = netdev_priv(dev);
 289        int i;
 290
 291        result->xdp_packets = 0;
 292        result->xdp_bytes = 0;
 293        result->xdp_drops = 0;
 294        for (i = 0; i < dev->num_rx_queues; i++) {
 295                struct veth_rq_stats *stats = &priv->rq[i].stats;
 296                u64 packets, bytes, drops;
 297                unsigned int start;
 298
 299                do {
 300                        start = u64_stats_fetch_begin_irq(&stats->syncp);
 301                        packets = stats->xdp_packets;
 302                        bytes = stats->xdp_bytes;
 303                        drops = stats->xdp_drops;
 304                } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
 305                result->xdp_packets += packets;
 306                result->xdp_bytes += bytes;
 307                result->xdp_drops += drops;
 308        }
 309}
 310
 311static void veth_get_stats64(struct net_device *dev,
 312                             struct rtnl_link_stats64 *tot)
 313{
 314        struct veth_priv *priv = netdev_priv(dev);
 315        struct net_device *peer;
 316        struct veth_rq_stats rx;
 317        u64 packets, bytes;
 318
 319        tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
 320        tot->tx_bytes = bytes;
 321        tot->tx_packets = packets;
 322
 323        veth_stats_rx(&rx, dev);
 324        tot->rx_dropped = rx.xdp_drops;
 325        tot->rx_bytes = rx.xdp_bytes;
 326        tot->rx_packets = rx.xdp_packets;
 327
 328        rcu_read_lock();
 329        peer = rcu_dereference(priv->peer);
 330        if (peer) {
 331                veth_stats_tx(peer, &packets, &bytes);
 332                tot->rx_bytes += bytes;
 333                tot->rx_packets += packets;
 334
 335                veth_stats_rx(&rx, peer);
 336                tot->tx_bytes += rx.xdp_bytes;
 337                tot->tx_packets += rx.xdp_packets;
 338        }
 339        rcu_read_unlock();
 340}
 341
 342/* fake multicast ability */
 343static void veth_set_multicast_list(struct net_device *dev)
 344{
 345}
 346
 347static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
 348                                      int buflen)
 349{
 350        struct sk_buff *skb;
 351
 352        if (!buflen) {
 353                buflen = SKB_DATA_ALIGN(headroom + len) +
 354                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 355        }
 356        skb = build_skb(head, buflen);
 357        if (!skb)
 358                return NULL;
 359
 360        skb_reserve(skb, headroom);
 361        skb_put(skb, len);
 362
 363        return skb;
 364}
 365
 366static int veth_select_rxq(struct net_device *dev)
 367{
 368        return smp_processor_id() % dev->real_num_rx_queues;
 369}
 370
 371static int veth_xdp_xmit(struct net_device *dev, int n,
 372                         struct xdp_frame **frames, u32 flags)
 373{
 374        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 375        struct net_device *rcv;
 376        int i, ret, drops = n;
 377        unsigned int max_len;
 378        struct veth_rq *rq;
 379
 380        rcu_read_lock();
 381        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
 382                ret = -EINVAL;
 383                goto drop;
 384        }
 385
 386        rcv = rcu_dereference(priv->peer);
 387        if (unlikely(!rcv)) {
 388                ret = -ENXIO;
 389                goto drop;
 390        }
 391
 392        rcv_priv = netdev_priv(rcv);
 393        rq = &rcv_priv->rq[veth_select_rxq(rcv)];
 394        /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
 395         * side. This means an XDP program is loaded on the peer and the peer
 396         * device is up.
 397         */
 398        if (!rcu_access_pointer(rq->xdp_prog)) {
 399                ret = -ENXIO;
 400                goto drop;
 401        }
 402
 403        drops = 0;
 404        max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
 405
 406        spin_lock(&rq->xdp_ring.producer_lock);
 407        for (i = 0; i < n; i++) {
 408                struct xdp_frame *frame = frames[i];
 409                void *ptr = veth_xdp_to_ptr(frame);
 410
 411                if (unlikely(frame->len > max_len ||
 412                             __ptr_ring_produce(&rq->xdp_ring, ptr))) {
 413                        xdp_return_frame_rx_napi(frame);
 414                        drops++;
 415                }
 416        }
 417        spin_unlock(&rq->xdp_ring.producer_lock);
 418
 419        if (flags & XDP_XMIT_FLUSH)
 420                __veth_xdp_flush(rq);
 421
 422        if (likely(!drops)) {
 423                rcu_read_unlock();
 424                return n;
 425        }
 426
 427        ret = n - drops;
 428drop:
 429        rcu_read_unlock();
 430        atomic64_add(drops, &priv->dropped);
 431
 432        return ret;
 433}
 434
 435static void veth_xdp_flush_bq(struct net_device *dev, struct veth_xdp_tx_bq *bq)
 436{
 437        int sent, i, err = 0;
 438
 439        sent = veth_xdp_xmit(dev, bq->count, bq->q, 0);
 440        if (sent < 0) {
 441                err = sent;
 442                sent = 0;
 443                for (i = 0; i < bq->count; i++)
 444                        xdp_return_frame(bq->q[i]);
 445        }
 446        trace_xdp_bulk_tx(dev, sent, bq->count - sent, err);
 447
 448        bq->count = 0;
 449}
 450
 451static void veth_xdp_flush(struct net_device *dev, struct veth_xdp_tx_bq *bq)
 452{
 453        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
 454        struct net_device *rcv;
 455        struct veth_rq *rq;
 456
 457        rcu_read_lock();
 458        veth_xdp_flush_bq(dev, bq);
 459        rcv = rcu_dereference(priv->peer);
 460        if (unlikely(!rcv))
 461                goto out;
 462
 463        rcv_priv = netdev_priv(rcv);
 464        rq = &rcv_priv->rq[veth_select_rxq(rcv)];
 465        /* xdp_ring is initialized on receive side? */
 466        if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
 467                goto out;
 468
 469        __veth_xdp_flush(rq);
 470out:
 471        rcu_read_unlock();
 472}
 473
 474static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp,
 475                       struct veth_xdp_tx_bq *bq)
 476{
 477        struct xdp_frame *frame = convert_to_xdp_frame(xdp);
 478
 479        if (unlikely(!frame))
 480                return -EOVERFLOW;
 481
 482        if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
 483                veth_xdp_flush_bq(dev, bq);
 484
 485        bq->q[bq->count++] = frame;
 486
 487        return 0;
 488}
 489
 490static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 491                                        struct xdp_frame *frame,
 492                                        unsigned int *xdp_xmit,
 493                                        struct veth_xdp_tx_bq *bq)
 494{
 495        void *hard_start = frame->data - frame->headroom;
 496        void *head = hard_start - sizeof(struct xdp_frame);
 497        int len = frame->len, delta = 0;
 498        struct xdp_frame orig_frame;
 499        struct bpf_prog *xdp_prog;
 500        unsigned int headroom;
 501        struct sk_buff *skb;
 502
 503        rcu_read_lock();
 504        xdp_prog = rcu_dereference(rq->xdp_prog);
 505        if (likely(xdp_prog)) {
 506                struct xdp_buff xdp;
 507                u32 act;
 508
 509                xdp.data_hard_start = hard_start;
 510                xdp.data = frame->data;
 511                xdp.data_end = frame->data + frame->len;
 512                xdp.data_meta = frame->data - frame->metasize;
 513                xdp.rxq = &rq->xdp_rxq;
 514
 515                act = bpf_prog_run_xdp(xdp_prog, &xdp);
 516
 517                switch (act) {
 518                case XDP_PASS:
 519                        delta = frame->data - xdp.data;
 520                        len = xdp.data_end - xdp.data;
 521                        break;
 522                case XDP_TX:
 523                        orig_frame = *frame;
 524                        xdp.data_hard_start = head;
 525                        xdp.rxq->mem = frame->mem;
 526                        if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
 527                                trace_xdp_exception(rq->dev, xdp_prog, act);
 528                                frame = &orig_frame;
 529                                goto err_xdp;
 530                        }
 531                        *xdp_xmit |= VETH_XDP_TX;
 532                        rcu_read_unlock();
 533                        goto xdp_xmit;
 534                case XDP_REDIRECT:
 535                        orig_frame = *frame;
 536                        xdp.data_hard_start = head;
 537                        xdp.rxq->mem = frame->mem;
 538                        if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
 539                                frame = &orig_frame;
 540                                goto err_xdp;
 541                        }
 542                        *xdp_xmit |= VETH_XDP_REDIR;
 543                        rcu_read_unlock();
 544                        goto xdp_xmit;
 545                default:
 546                        bpf_warn_invalid_xdp_action(act);
 547                        /* fall through */
 548                case XDP_ABORTED:
 549                        trace_xdp_exception(rq->dev, xdp_prog, act);
 550                        /* fall through */
 551                case XDP_DROP:
 552                        goto err_xdp;
 553                }
 554        }
 555        rcu_read_unlock();
 556
 557        headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
 558        skb = veth_build_skb(head, headroom, len, 0);
 559        if (!skb) {
 560                xdp_return_frame(frame);
 561                goto err;
 562        }
 563
 564        xdp_release_frame(frame);
 565        xdp_scrub_frame(frame);
 566        skb->protocol = eth_type_trans(skb, rq->dev);
 567err:
 568        return skb;
 569err_xdp:
 570        rcu_read_unlock();
 571        xdp_return_frame(frame);
 572xdp_xmit:
 573        return NULL;
 574}
 575
 576static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
 577                                        unsigned int *xdp_xmit,
 578                                        struct veth_xdp_tx_bq *bq)
 579{
 580        u32 pktlen, headroom, act, metalen;
 581        void *orig_data, *orig_data_end;
 582        struct bpf_prog *xdp_prog;
 583        int mac_len, delta, off;
 584        struct xdp_buff xdp;
 585
 586        skb_orphan(skb);
 587
 588        rcu_read_lock();
 589        xdp_prog = rcu_dereference(rq->xdp_prog);
 590        if (unlikely(!xdp_prog)) {
 591                rcu_read_unlock();
 592                goto out;
 593        }
 594
 595        mac_len = skb->data - skb_mac_header(skb);
 596        pktlen = skb->len + mac_len;
 597        headroom = skb_headroom(skb) - mac_len;
 598
 599        if (skb_shared(skb) || skb_head_is_locked(skb) ||
 600            skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
 601                struct sk_buff *nskb;
 602                int size, head_off;
 603                void *head, *start;
 604                struct page *page;
 605
 606                size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
 607                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 608                if (size > PAGE_SIZE)
 609                        goto drop;
 610
 611                page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 612                if (!page)
 613                        goto drop;
 614
 615                head = page_address(page);
 616                start = head + VETH_XDP_HEADROOM;
 617                if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
 618                        page_frag_free(head);
 619                        goto drop;
 620                }
 621
 622                nskb = veth_build_skb(head,
 623                                      VETH_XDP_HEADROOM + mac_len, skb->len,
 624                                      PAGE_SIZE);
 625                if (!nskb) {
 626                        page_frag_free(head);
 627                        goto drop;
 628                }
 629
 630                skb_copy_header(nskb, skb);
 631                head_off = skb_headroom(nskb) - skb_headroom(skb);
 632                skb_headers_offset_update(nskb, head_off);
 633                consume_skb(skb);
 634                skb = nskb;
 635        }
 636
 637        xdp.data_hard_start = skb->head;
 638        xdp.data = skb_mac_header(skb);
 639        xdp.data_end = xdp.data + pktlen;
 640        xdp.data_meta = xdp.data;
 641        xdp.rxq = &rq->xdp_rxq;
 642        orig_data = xdp.data;
 643        orig_data_end = xdp.data_end;
 644
 645        act = bpf_prog_run_xdp(xdp_prog, &xdp);
 646
 647        switch (act) {
 648        case XDP_PASS:
 649                break;
 650        case XDP_TX:
 651                get_page(virt_to_page(xdp.data));
 652                consume_skb(skb);
 653                xdp.rxq->mem = rq->xdp_mem;
 654                if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
 655                        trace_xdp_exception(rq->dev, xdp_prog, act);
 656                        goto err_xdp;
 657                }
 658                *xdp_xmit |= VETH_XDP_TX;
 659                rcu_read_unlock();
 660                goto xdp_xmit;
 661        case XDP_REDIRECT:
 662                get_page(virt_to_page(xdp.data));
 663                consume_skb(skb);
 664                xdp.rxq->mem = rq->xdp_mem;
 665                if (xdp_do_redirect(rq->dev, &xdp, xdp_prog))
 666                        goto err_xdp;
 667                *xdp_xmit |= VETH_XDP_REDIR;
 668                rcu_read_unlock();
 669                goto xdp_xmit;
 670        default:
 671                bpf_warn_invalid_xdp_action(act);
 672                /* fall through */
 673        case XDP_ABORTED:
 674                trace_xdp_exception(rq->dev, xdp_prog, act);
 675                /* fall through */
 676        case XDP_DROP:
 677                goto drop;
 678        }
 679        rcu_read_unlock();
 680
 681        delta = orig_data - xdp.data;
 682        off = mac_len + delta;
 683        if (off > 0)
 684                __skb_push(skb, off);
 685        else if (off < 0)
 686                __skb_pull(skb, -off);
 687        skb->mac_header -= delta;
 688        off = xdp.data_end - orig_data_end;
 689        if (off != 0)
 690                __skb_put(skb, off);
 691        skb->protocol = eth_type_trans(skb, rq->dev);
 692
 693        metalen = xdp.data - xdp.data_meta;
 694        if (metalen)
 695                skb_metadata_set(skb, metalen);
 696out:
 697        return skb;
 698drop:
 699        rcu_read_unlock();
 700        kfree_skb(skb);
 701        return NULL;
 702err_xdp:
 703        rcu_read_unlock();
 704        page_frag_free(xdp.data);
 705xdp_xmit:
 706        return NULL;
 707}
 708
 709static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit,
 710                        struct veth_xdp_tx_bq *bq)
 711{
 712        int i, done = 0, drops = 0, bytes = 0;
 713
 714        for (i = 0; i < budget; i++) {
 715                void *ptr = __ptr_ring_consume(&rq->xdp_ring);
 716                unsigned int xdp_xmit_one = 0;
 717                struct sk_buff *skb;
 718
 719                if (!ptr)
 720                        break;
 721
 722                if (veth_is_xdp_frame(ptr)) {
 723                        struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
 724
 725                        bytes += frame->len;
 726                        skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one, bq);
 727                } else {
 728                        skb = ptr;
 729                        bytes += skb->len;
 730                        skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one, bq);
 731                }
 732                *xdp_xmit |= xdp_xmit_one;
 733
 734                if (skb)
 735                        napi_gro_receive(&rq->xdp_napi, skb);
 736                else if (!xdp_xmit_one)
 737                        drops++;
 738
 739                done++;
 740        }
 741
 742        u64_stats_update_begin(&rq->stats.syncp);
 743        rq->stats.xdp_packets += done;
 744        rq->stats.xdp_bytes += bytes;
 745        rq->stats.xdp_drops += drops;
 746        u64_stats_update_end(&rq->stats.syncp);
 747
 748        return done;
 749}
 750
 751static int veth_poll(struct napi_struct *napi, int budget)
 752{
 753        struct veth_rq *rq =
 754                container_of(napi, struct veth_rq, xdp_napi);
 755        unsigned int xdp_xmit = 0;
 756        struct veth_xdp_tx_bq bq;
 757        int done;
 758
 759        bq.count = 0;
 760
 761        xdp_set_return_frame_no_direct();
 762        done = veth_xdp_rcv(rq, budget, &xdp_xmit, &bq);
 763
 764        if (done < budget && napi_complete_done(napi, done)) {
 765                /* Write rx_notify_masked before reading ptr_ring */
 766                smp_store_mb(rq->rx_notify_masked, false);
 767                if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
 768                        rq->rx_notify_masked = true;
 769                        napi_schedule(&rq->xdp_napi);
 770                }
 771        }
 772
 773        if (xdp_xmit & VETH_XDP_TX)
 774                veth_xdp_flush(rq->dev, &bq);
 775        if (xdp_xmit & VETH_XDP_REDIR)
 776                xdp_do_flush();
 777        xdp_clear_return_frame_no_direct();
 778
 779        return done;
 780}
 781
 782static int veth_napi_add(struct net_device *dev)
 783{
 784        struct veth_priv *priv = netdev_priv(dev);
 785        int err, i;
 786
 787        for (i = 0; i < dev->real_num_rx_queues; i++) {
 788                struct veth_rq *rq = &priv->rq[i];
 789
 790                err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
 791                if (err)
 792                        goto err_xdp_ring;
 793        }
 794
 795        for (i = 0; i < dev->real_num_rx_queues; i++) {
 796                struct veth_rq *rq = &priv->rq[i];
 797
 798                netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
 799                napi_enable(&rq->xdp_napi);
 800        }
 801
 802        return 0;
 803err_xdp_ring:
 804        for (i--; i >= 0; i--)
 805                ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
 806
 807        return err;
 808}
 809
 810static void veth_napi_del(struct net_device *dev)
 811{
 812        struct veth_priv *priv = netdev_priv(dev);
 813        int i;
 814
 815        for (i = 0; i < dev->real_num_rx_queues; i++) {
 816                struct veth_rq *rq = &priv->rq[i];
 817
 818                napi_disable(&rq->xdp_napi);
 819                napi_hash_del(&rq->xdp_napi);
 820        }
 821        synchronize_net();
 822
 823        for (i = 0; i < dev->real_num_rx_queues; i++) {
 824                struct veth_rq *rq = &priv->rq[i];
 825
 826                netif_napi_del(&rq->xdp_napi);
 827                rq->rx_notify_masked = false;
 828                ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
 829        }
 830}
 831
 832static int veth_enable_xdp(struct net_device *dev)
 833{
 834        struct veth_priv *priv = netdev_priv(dev);
 835        int err, i;
 836
 837        if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
 838                for (i = 0; i < dev->real_num_rx_queues; i++) {
 839                        struct veth_rq *rq = &priv->rq[i];
 840
 841                        err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i);
 842                        if (err < 0)
 843                                goto err_rxq_reg;
 844
 845                        err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 846                                                         MEM_TYPE_PAGE_SHARED,
 847                                                         NULL);
 848                        if (err < 0)
 849                                goto err_reg_mem;
 850
 851                        /* Save original mem info as it can be overwritten */
 852                        rq->xdp_mem = rq->xdp_rxq.mem;
 853                }
 854
 855                err = veth_napi_add(dev);
 856                if (err)
 857                        goto err_rxq_reg;
 858        }
 859
 860        for (i = 0; i < dev->real_num_rx_queues; i++)
 861                rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
 862
 863        return 0;
 864err_reg_mem:
 865        xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
 866err_rxq_reg:
 867        for (i--; i >= 0; i--)
 868                xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
 869
 870        return err;
 871}
 872
 873static void veth_disable_xdp(struct net_device *dev)
 874{
 875        struct veth_priv *priv = netdev_priv(dev);
 876        int i;
 877
 878        for (i = 0; i < dev->real_num_rx_queues; i++)
 879                rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
 880        veth_napi_del(dev);
 881        for (i = 0; i < dev->real_num_rx_queues; i++) {
 882                struct veth_rq *rq = &priv->rq[i];
 883
 884                rq->xdp_rxq.mem = rq->xdp_mem;
 885                xdp_rxq_info_unreg(&rq->xdp_rxq);
 886        }
 887}
 888
 889static int veth_open(struct net_device *dev)
 890{
 891        struct veth_priv *priv = netdev_priv(dev);
 892        struct net_device *peer = rtnl_dereference(priv->peer);
 893        int err;
 894
 895        if (!peer)
 896                return -ENOTCONN;
 897
 898        if (priv->_xdp_prog) {
 899                err = veth_enable_xdp(dev);
 900                if (err)
 901                        return err;
 902        }
 903
 904        if (peer->flags & IFF_UP) {
 905                netif_carrier_on(dev);
 906                netif_carrier_on(peer);
 907        }
 908
 909        return 0;
 910}
 911
 912static int veth_close(struct net_device *dev)
 913{
 914        struct veth_priv *priv = netdev_priv(dev);
 915        struct net_device *peer = rtnl_dereference(priv->peer);
 916
 917        netif_carrier_off(dev);
 918        if (peer)
 919                netif_carrier_off(peer);
 920
 921        if (priv->_xdp_prog)
 922                veth_disable_xdp(dev);
 923
 924        return 0;
 925}
 926
 927static int is_valid_veth_mtu(int mtu)
 928{
 929        return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
 930}
 931
 932static int veth_alloc_queues(struct net_device *dev)
 933{
 934        struct veth_priv *priv = netdev_priv(dev);
 935        int i;
 936
 937        priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
 938        if (!priv->rq)
 939                return -ENOMEM;
 940
 941        for (i = 0; i < dev->num_rx_queues; i++) {
 942                priv->rq[i].dev = dev;
 943                u64_stats_init(&priv->rq[i].stats.syncp);
 944        }
 945
 946        return 0;
 947}
 948
 949static void veth_free_queues(struct net_device *dev)
 950{
 951        struct veth_priv *priv = netdev_priv(dev);
 952
 953        kfree(priv->rq);
 954}
 955
 956static int veth_dev_init(struct net_device *dev)
 957{
 958        int err;
 959
 960        dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
 961        if (!dev->lstats)
 962                return -ENOMEM;
 963
 964        err = veth_alloc_queues(dev);
 965        if (err) {
 966                free_percpu(dev->lstats);
 967                return err;
 968        }
 969
 970        return 0;
 971}
 972
 973static void veth_dev_free(struct net_device *dev)
 974{
 975        veth_free_queues(dev);
 976        free_percpu(dev->lstats);
 977}
 978
 979#ifdef CONFIG_NET_POLL_CONTROLLER
 980static void veth_poll_controller(struct net_device *dev)
 981{
 982        /* veth only receives frames when its peer sends one
 983         * Since it has nothing to do with disabling irqs, we are guaranteed
 984         * never to have pending data when we poll for it so
 985         * there is nothing to do here.
 986         *
 987         * We need this though so netpoll recognizes us as an interface that
 988         * supports polling, which enables bridge devices in virt setups to
 989         * still use netconsole
 990         */
 991}
 992#endif  /* CONFIG_NET_POLL_CONTROLLER */
 993
 994static int veth_get_iflink(const struct net_device *dev)
 995{
 996        struct veth_priv *priv = netdev_priv(dev);
 997        struct net_device *peer;
 998        int iflink;
 999
1000        rcu_read_lock();
1001        peer = rcu_dereference(priv->peer);
1002        iflink = peer ? peer->ifindex : 0;
1003        rcu_read_unlock();
1004
1005        return iflink;
1006}
1007
1008static netdev_features_t veth_fix_features(struct net_device *dev,
1009                                           netdev_features_t features)
1010{
1011        struct veth_priv *priv = netdev_priv(dev);
1012        struct net_device *peer;
1013
1014        peer = rtnl_dereference(priv->peer);
1015        if (peer) {
1016                struct veth_priv *peer_priv = netdev_priv(peer);
1017
1018                if (peer_priv->_xdp_prog)
1019                        features &= ~NETIF_F_GSO_SOFTWARE;
1020        }
1021
1022        return features;
1023}
1024
1025static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
1026{
1027        struct veth_priv *peer_priv, *priv = netdev_priv(dev);
1028        struct net_device *peer;
1029
1030        if (new_hr < 0)
1031                new_hr = 0;
1032
1033        rcu_read_lock();
1034        peer = rcu_dereference(priv->peer);
1035        if (unlikely(!peer))
1036                goto out;
1037
1038        peer_priv = netdev_priv(peer);
1039        priv->requested_headroom = new_hr;
1040        new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
1041        dev->needed_headroom = new_hr;
1042        peer->needed_headroom = new_hr;
1043
1044out:
1045        rcu_read_unlock();
1046}
1047
1048static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1049                        struct netlink_ext_ack *extack)
1050{
1051        struct veth_priv *priv = netdev_priv(dev);
1052        struct bpf_prog *old_prog;
1053        struct net_device *peer;
1054        unsigned int max_mtu;
1055        int err;
1056
1057        old_prog = priv->_xdp_prog;
1058        priv->_xdp_prog = prog;
1059        peer = rtnl_dereference(priv->peer);
1060
1061        if (prog) {
1062                if (!peer) {
1063                        NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
1064                        err = -ENOTCONN;
1065                        goto err;
1066                }
1067
1068                max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
1069                          peer->hard_header_len -
1070                          SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1071                if (peer->mtu > max_mtu) {
1072                        NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
1073                        err = -ERANGE;
1074                        goto err;
1075                }
1076
1077                if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
1078                        NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
1079                        err = -ENOSPC;
1080                        goto err;
1081                }
1082
1083                if (dev->flags & IFF_UP) {
1084                        err = veth_enable_xdp(dev);
1085                        if (err) {
1086                                NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
1087                                goto err;
1088                        }
1089                }
1090
1091                if (!old_prog) {
1092                        peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
1093                        peer->max_mtu = max_mtu;
1094                }
1095        }
1096
1097        if (old_prog) {
1098                if (!prog) {
1099                        if (dev->flags & IFF_UP)
1100                                veth_disable_xdp(dev);
1101
1102                        if (peer) {
1103                                peer->hw_features |= NETIF_F_GSO_SOFTWARE;
1104                                peer->max_mtu = ETH_MAX_MTU;
1105                        }
1106                }
1107                bpf_prog_put(old_prog);
1108        }
1109
1110        if ((!!old_prog ^ !!prog) && peer)
1111                netdev_update_features(peer);
1112
1113        return 0;
1114err:
1115        priv->_xdp_prog = old_prog;
1116
1117        return err;
1118}
1119
1120static u32 veth_xdp_query(struct net_device *dev)
1121{
1122        struct veth_priv *priv = netdev_priv(dev);
1123        const struct bpf_prog *xdp_prog;
1124
1125        xdp_prog = priv->_xdp_prog;
1126        if (xdp_prog)
1127                return xdp_prog->aux->id;
1128
1129        return 0;
1130}
1131
1132static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1133{
1134        switch (xdp->command) {
1135        case XDP_SETUP_PROG:
1136                return veth_xdp_set(dev, xdp->prog, xdp->extack);
1137        case XDP_QUERY_PROG:
1138                xdp->prog_id = veth_xdp_query(dev);
1139                return 0;
1140        default:
1141                return -EINVAL;
1142        }
1143}
1144
1145static const struct net_device_ops veth_netdev_ops = {
1146        .ndo_init            = veth_dev_init,
1147        .ndo_open            = veth_open,
1148        .ndo_stop            = veth_close,
1149        .ndo_start_xmit      = veth_xmit,
1150        .ndo_get_stats64     = veth_get_stats64,
1151        .ndo_set_rx_mode     = veth_set_multicast_list,
1152        .ndo_set_mac_address = eth_mac_addr,
1153#ifdef CONFIG_NET_POLL_CONTROLLER
1154        .ndo_poll_controller    = veth_poll_controller,
1155#endif
1156        .ndo_get_iflink         = veth_get_iflink,
1157        .ndo_fix_features       = veth_fix_features,
1158        .ndo_features_check     = passthru_features_check,
1159        .ndo_set_rx_headroom    = veth_set_rx_headroom,
1160        .ndo_bpf                = veth_xdp,
1161        .ndo_xdp_xmit           = veth_xdp_xmit,
1162};
1163
1164#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
1165                       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
1166                       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
1167                       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
1168                       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
1169
1170static void veth_setup(struct net_device *dev)
1171{
1172        ether_setup(dev);
1173
1174        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1175        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1176        dev->priv_flags |= IFF_NO_QUEUE;
1177        dev->priv_flags |= IFF_PHONY_HEADROOM;
1178
1179        dev->netdev_ops = &veth_netdev_ops;
1180        dev->ethtool_ops = &veth_ethtool_ops;
1181        dev->features |= NETIF_F_LLTX;
1182        dev->features |= VETH_FEATURES;
1183        dev->vlan_features = dev->features &
1184                             ~(NETIF_F_HW_VLAN_CTAG_TX |
1185                               NETIF_F_HW_VLAN_STAG_TX |
1186                               NETIF_F_HW_VLAN_CTAG_RX |
1187                               NETIF_F_HW_VLAN_STAG_RX);
1188        dev->needs_free_netdev = true;
1189        dev->priv_destructor = veth_dev_free;
1190        dev->max_mtu = ETH_MAX_MTU;
1191
1192        dev->hw_features = VETH_FEATURES;
1193        dev->hw_enc_features = VETH_FEATURES;
1194        dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
1195}
1196
1197/*
1198 * netlink interface
1199 */
1200
1201static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
1202                         struct netlink_ext_ack *extack)
1203{
1204        if (tb[IFLA_ADDRESS]) {
1205                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1206                        return -EINVAL;
1207                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1208                        return -EADDRNOTAVAIL;
1209        }
1210        if (tb[IFLA_MTU]) {
1211                if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
1212                        return -EINVAL;
1213        }
1214        return 0;
1215}
1216
1217static struct rtnl_link_ops veth_link_ops;
1218
1219static int veth_newlink(struct net *src_net, struct net_device *dev,
1220                        struct nlattr *tb[], struct nlattr *data[],
1221                        struct netlink_ext_ack *extack)
1222{
1223        int err;
1224        struct net_device *peer;
1225        struct veth_priv *priv;
1226        char ifname[IFNAMSIZ];
1227        struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
1228        unsigned char name_assign_type;
1229        struct ifinfomsg *ifmp;
1230        struct net *net;
1231
1232        /*
1233         * create and register peer first
1234         */
1235        if (data != NULL && data[VETH_INFO_PEER] != NULL) {
1236                struct nlattr *nla_peer;
1237
1238                nla_peer = data[VETH_INFO_PEER];
1239                ifmp = nla_data(nla_peer);
1240                err = rtnl_nla_parse_ifla(peer_tb,
1241                                          nla_data(nla_peer) + sizeof(struct ifinfomsg),
1242                                          nla_len(nla_peer) - sizeof(struct ifinfomsg),
1243                                          NULL);
1244                if (err < 0)
1245                        return err;
1246
1247                err = veth_validate(peer_tb, NULL, extack);
1248                if (err < 0)
1249                        return err;
1250
1251                tbp = peer_tb;
1252        } else {
1253                ifmp = NULL;
1254                tbp = tb;
1255        }
1256
1257        if (ifmp && tbp[IFLA_IFNAME]) {
1258                nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
1259                name_assign_type = NET_NAME_USER;
1260        } else {
1261                snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
1262                name_assign_type = NET_NAME_ENUM;
1263        }
1264
1265        net = rtnl_link_get_net(src_net, tbp);
1266        if (IS_ERR(net))
1267                return PTR_ERR(net);
1268
1269        peer = rtnl_create_link(net, ifname, name_assign_type,
1270                                &veth_link_ops, tbp, extack);
1271        if (IS_ERR(peer)) {
1272                put_net(net);
1273                return PTR_ERR(peer);
1274        }
1275
1276        if (!ifmp || !tbp[IFLA_ADDRESS])
1277                eth_hw_addr_random(peer);
1278
1279        if (ifmp && (dev->ifindex != 0))
1280                peer->ifindex = ifmp->ifi_index;
1281
1282        peer->gso_max_size = dev->gso_max_size;
1283        peer->gso_max_segs = dev->gso_max_segs;
1284
1285        err = register_netdevice(peer);
1286        put_net(net);
1287        net = NULL;
1288        if (err < 0)
1289                goto err_register_peer;
1290
1291        netif_carrier_off(peer);
1292
1293        err = rtnl_configure_link(peer, ifmp);
1294        if (err < 0)
1295                goto err_configure_peer;
1296
1297        /*
1298         * register dev last
1299         *
1300         * note, that since we've registered new device the dev's name
1301         * should be re-allocated
1302         */
1303
1304        if (tb[IFLA_ADDRESS] == NULL)
1305                eth_hw_addr_random(dev);
1306
1307        if (tb[IFLA_IFNAME])
1308                nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
1309        else
1310                snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
1311
1312        err = register_netdevice(dev);
1313        if (err < 0)
1314                goto err_register_dev;
1315
1316        netif_carrier_off(dev);
1317
1318        /*
1319         * tie the deviced together
1320         */
1321
1322        priv = netdev_priv(dev);
1323        rcu_assign_pointer(priv->peer, peer);
1324
1325        priv = netdev_priv(peer);
1326        rcu_assign_pointer(priv->peer, dev);
1327
1328        return 0;
1329
1330err_register_dev:
1331        /* nothing to do */
1332err_configure_peer:
1333        unregister_netdevice(peer);
1334        return err;
1335
1336err_register_peer:
1337        free_netdev(peer);
1338        return err;
1339}
1340
1341static void veth_dellink(struct net_device *dev, struct list_head *head)
1342{
1343        struct veth_priv *priv;
1344        struct net_device *peer;
1345
1346        priv = netdev_priv(dev);
1347        peer = rtnl_dereference(priv->peer);
1348
1349        /* Note : dellink() is called from default_device_exit_batch(),
1350         * before a rcu_synchronize() point. The devices are guaranteed
1351         * not being freed before one RCU grace period.
1352         */
1353        RCU_INIT_POINTER(priv->peer, NULL);
1354        unregister_netdevice_queue(dev, head);
1355
1356        if (peer) {
1357                priv = netdev_priv(peer);
1358                RCU_INIT_POINTER(priv->peer, NULL);
1359                unregister_netdevice_queue(peer, head);
1360        }
1361}
1362
1363static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
1364        [VETH_INFO_PEER]        = { .len = sizeof(struct ifinfomsg) },
1365};
1366
1367static struct net *veth_get_link_net(const struct net_device *dev)
1368{
1369        struct veth_priv *priv = netdev_priv(dev);
1370        struct net_device *peer = rtnl_dereference(priv->peer);
1371
1372        return peer ? dev_net(peer) : dev_net(dev);
1373}
1374
1375static struct rtnl_link_ops veth_link_ops = {
1376        .kind           = DRV_NAME,
1377        .priv_size      = sizeof(struct veth_priv),
1378        .setup          = veth_setup,
1379        .validate       = veth_validate,
1380        .newlink        = veth_newlink,
1381        .dellink        = veth_dellink,
1382        .policy         = veth_policy,
1383        .maxtype        = VETH_INFO_MAX,
1384        .get_link_net   = veth_get_link_net,
1385};
1386
1387/*
1388 * init/fini
1389 */
1390
1391static __init int veth_init(void)
1392{
1393        return rtnl_link_register(&veth_link_ops);
1394}
1395
1396static __exit void veth_exit(void)
1397{
1398        rtnl_link_unregister(&veth_link_ops);
1399}
1400
1401module_init(veth_init);
1402module_exit(veth_exit);
1403
1404MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
1405MODULE_LICENSE("GPL v2");
1406MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1407