linux/drivers/net/ipvlan/ipvlan_main.c
<<
>>
Prefs
   1/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of the GNU General Public License as
   5 * published by the Free Software Foundation; either version 2 of
   6 * the License, or (at your option) any later version.
   7 *
   8 */
   9
  10#include "ipvlan.h"
  11
  12static unsigned int ipvlan_netid __read_mostly;
  13
  14struct ipvlan_netns {
  15        unsigned int ipvl_nf_hook_refcnt;
  16};
  17
  18static const struct nf_hook_ops ipvl_nfops[] = {
  19        {
  20                .hook     = ipvlan_nf_input,
  21                .pf       = NFPROTO_IPV4,
  22                .hooknum  = NF_INET_LOCAL_IN,
  23                .priority = INT_MAX,
  24        },
  25#if IS_ENABLED(CONFIG_IPV6)
  26        {
  27                .hook     = ipvlan_nf_input,
  28                .pf       = NFPROTO_IPV6,
  29                .hooknum  = NF_INET_LOCAL_IN,
  30                .priority = INT_MAX,
  31        },
  32#endif
  33};
  34
  35static const struct l3mdev_ops ipvl_l3mdev_ops = {
  36        .l3mdev_l3_rcv = ipvlan_l3_rcv,
  37};
  38
  39static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
  40{
  41        ipvlan->dev->mtu = dev->mtu;
  42}
  43
  44static int ipvlan_register_nf_hook(struct net *net)
  45{
  46        struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  47        int err = 0;
  48
  49        if (!vnet->ipvl_nf_hook_refcnt) {
  50                err = nf_register_net_hooks(net, ipvl_nfops,
  51                                            ARRAY_SIZE(ipvl_nfops));
  52                if (!err)
  53                        vnet->ipvl_nf_hook_refcnt = 1;
  54        } else {
  55                vnet->ipvl_nf_hook_refcnt++;
  56        }
  57
  58        return err;
  59}
  60
  61static void ipvlan_unregister_nf_hook(struct net *net)
  62{
  63        struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  64
  65        if (WARN_ON(!vnet->ipvl_nf_hook_refcnt))
  66                return;
  67
  68        vnet->ipvl_nf_hook_refcnt--;
  69        if (!vnet->ipvl_nf_hook_refcnt)
  70                nf_unregister_net_hooks(net, ipvl_nfops,
  71                                        ARRAY_SIZE(ipvl_nfops));
  72}
  73
  74static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
  75{
  76        struct ipvl_dev *ipvlan;
  77        struct net_device *mdev = port->dev;
  78        unsigned int flags;
  79        int err;
  80
  81        ASSERT_RTNL();
  82        if (port->mode != nval) {
  83                list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
  84                        flags = ipvlan->dev->flags;
  85                        if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
  86                                err = dev_change_flags(ipvlan->dev,
  87                                                       flags | IFF_NOARP);
  88                        } else {
  89                                err = dev_change_flags(ipvlan->dev,
  90                                                       flags & ~IFF_NOARP);
  91                        }
  92                        if (unlikely(err))
  93                                goto fail;
  94                }
  95                if (nval == IPVLAN_MODE_L3S) {
  96                        /* New mode is L3S */
  97                        err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
  98                        if (!err) {
  99                                mdev->l3mdev_ops = &ipvl_l3mdev_ops;
 100                                mdev->priv_flags |= IFF_L3MDEV_MASTER;
 101                        } else
 102                                goto fail;
 103                } else if (port->mode == IPVLAN_MODE_L3S) {
 104                        /* Old mode was L3S */
 105                        mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
 106                        ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
 107                        mdev->l3mdev_ops = NULL;
 108                }
 109                port->mode = nval;
 110        }
 111        return 0;
 112
 113fail:
 114        /* Undo the flags changes that have been done so far. */
 115        list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
 116                flags = ipvlan->dev->flags;
 117                if (port->mode == IPVLAN_MODE_L3 ||
 118                    port->mode == IPVLAN_MODE_L3S)
 119                        dev_change_flags(ipvlan->dev, flags | IFF_NOARP);
 120                else
 121                        dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP);
 122        }
 123
 124        return err;
 125}
 126
 127static int ipvlan_port_create(struct net_device *dev)
 128{
 129        struct ipvl_port *port;
 130        int err, idx;
 131
 132        port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
 133        if (!port)
 134                return -ENOMEM;
 135
 136        write_pnet(&port->pnet, dev_net(dev));
 137        port->dev = dev;
 138        port->mode = IPVLAN_MODE_L3;
 139        INIT_LIST_HEAD(&port->ipvlans);
 140        for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
 141                INIT_HLIST_HEAD(&port->hlhead[idx]);
 142
 143        skb_queue_head_init(&port->backlog);
 144        INIT_WORK(&port->wq, ipvlan_process_multicast);
 145        ida_init(&port->ida);
 146        port->dev_id_start = 1;
 147
 148        err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
 149        if (err)
 150                goto err;
 151
 152        return 0;
 153
 154err:
 155        kfree(port);
 156        return err;
 157}
 158
 159static void ipvlan_port_destroy(struct net_device *dev)
 160{
 161        struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 162        struct sk_buff *skb;
 163
 164        if (port->mode == IPVLAN_MODE_L3S) {
 165                dev->priv_flags &= ~IFF_L3MDEV_MASTER;
 166                ipvlan_unregister_nf_hook(dev_net(dev));
 167                dev->l3mdev_ops = NULL;
 168        }
 169        netdev_rx_handler_unregister(dev);
 170        cancel_work_sync(&port->wq);
 171        while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
 172                if (skb->dev)
 173                        dev_put(skb->dev);
 174                kfree_skb(skb);
 175        }
 176        ida_destroy(&port->ida);
 177        kfree(port);
 178}
 179
 180#define IPVLAN_FEATURES \
 181        (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 182         NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
 183         NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 184         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 185
 186#define IPVLAN_STATE_MASK \
 187        ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 188
 189static int ipvlan_init(struct net_device *dev)
 190{
 191        struct ipvl_dev *ipvlan = netdev_priv(dev);
 192        struct net_device *phy_dev = ipvlan->phy_dev;
 193        struct ipvl_port *port;
 194        int err;
 195
 196        dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
 197                     (phy_dev->state & IPVLAN_STATE_MASK);
 198        dev->features = phy_dev->features & IPVLAN_FEATURES;
 199        dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
 200        dev->gso_max_size = phy_dev->gso_max_size;
 201        dev->gso_max_segs = phy_dev->gso_max_segs;
 202        dev->hard_header_len = phy_dev->hard_header_len;
 203
 204        netdev_lockdep_set_classes(dev);
 205
 206        ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
 207        if (!ipvlan->pcpu_stats)
 208                return -ENOMEM;
 209
 210        if (!netif_is_ipvlan_port(phy_dev)) {
 211                err = ipvlan_port_create(phy_dev);
 212                if (err < 0) {
 213                        free_percpu(ipvlan->pcpu_stats);
 214                        return err;
 215                }
 216        }
 217        port = ipvlan_port_get_rtnl(phy_dev);
 218        port->count += 1;
 219        return 0;
 220}
 221
 222static void ipvlan_uninit(struct net_device *dev)
 223{
 224        struct ipvl_dev *ipvlan = netdev_priv(dev);
 225        struct net_device *phy_dev = ipvlan->phy_dev;
 226        struct ipvl_port *port;
 227
 228        free_percpu(ipvlan->pcpu_stats);
 229
 230        port = ipvlan_port_get_rtnl(phy_dev);
 231        port->count -= 1;
 232        if (!port->count)
 233                ipvlan_port_destroy(port->dev);
 234}
 235
 236static int ipvlan_open(struct net_device *dev)
 237{
 238        struct ipvl_dev *ipvlan = netdev_priv(dev);
 239        struct net_device *phy_dev = ipvlan->phy_dev;
 240        struct ipvl_addr *addr;
 241
 242        if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
 243            ipvlan->port->mode == IPVLAN_MODE_L3S)
 244                dev->flags |= IFF_NOARP;
 245        else
 246                dev->flags &= ~IFF_NOARP;
 247
 248        rcu_read_lock();
 249        list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 250                ipvlan_ht_addr_add(ipvlan, addr);
 251        rcu_read_unlock();
 252
 253        return dev_uc_add(phy_dev, phy_dev->dev_addr);
 254}
 255
 256static int ipvlan_stop(struct net_device *dev)
 257{
 258        struct ipvl_dev *ipvlan = netdev_priv(dev);
 259        struct net_device *phy_dev = ipvlan->phy_dev;
 260        struct ipvl_addr *addr;
 261
 262        dev_uc_unsync(phy_dev, dev);
 263        dev_mc_unsync(phy_dev, dev);
 264
 265        dev_uc_del(phy_dev, phy_dev->dev_addr);
 266
 267        rcu_read_lock();
 268        list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 269                ipvlan_ht_addr_del(addr);
 270        rcu_read_unlock();
 271
 272        return 0;
 273}
 274
 275static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb,
 276                                     struct net_device *dev)
 277{
 278        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 279        int skblen = skb->len;
 280        int ret;
 281
 282        ret = ipvlan_queue_xmit(skb, dev);
 283        if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 284                struct ipvl_pcpu_stats *pcptr;
 285
 286                pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
 287
 288                u64_stats_update_begin(&pcptr->syncp);
 289                pcptr->tx_pkts++;
 290                pcptr->tx_bytes += skblen;
 291                u64_stats_update_end(&pcptr->syncp);
 292        } else {
 293                this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
 294        }
 295        return ret;
 296}
 297
 298static netdev_features_t ipvlan_fix_features(struct net_device *dev,
 299                                             netdev_features_t features)
 300{
 301        struct ipvl_dev *ipvlan = netdev_priv(dev);
 302
 303        return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES);
 304}
 305
 306static void ipvlan_change_rx_flags(struct net_device *dev, int change)
 307{
 308        struct ipvl_dev *ipvlan = netdev_priv(dev);
 309        struct net_device *phy_dev = ipvlan->phy_dev;
 310
 311        if (change & IFF_ALLMULTI)
 312                dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
 313}
 314
 315static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
 316{
 317        struct ipvl_dev *ipvlan = netdev_priv(dev);
 318
 319        if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
 320                bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
 321        } else {
 322                struct netdev_hw_addr *ha;
 323                DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
 324
 325                bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
 326                netdev_for_each_mc_addr(ha, dev)
 327                        __set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
 328
 329                /* Turn-on broadcast bit irrespective of address family,
 330                 * since broadcast is deferred to a work-queue, hence no
 331                 * impact on fast-path processing.
 332                 */
 333                __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
 334
 335                bitmap_copy(ipvlan->mac_filters, mc_filters,
 336                            IPVLAN_MAC_FILTER_SIZE);
 337        }
 338        dev_uc_sync(ipvlan->phy_dev, dev);
 339        dev_mc_sync(ipvlan->phy_dev, dev);
 340}
 341
 342static void ipvlan_get_stats64(struct net_device *dev,
 343                               struct rtnl_link_stats64 *s)
 344{
 345        struct ipvl_dev *ipvlan = netdev_priv(dev);
 346
 347        if (ipvlan->pcpu_stats) {
 348                struct ipvl_pcpu_stats *pcptr;
 349                u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
 350                u32 rx_errs = 0, tx_drps = 0;
 351                u32 strt;
 352                int idx;
 353
 354                for_each_possible_cpu(idx) {
 355                        pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
 356                        do {
 357                                strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
 358                                rx_pkts = pcptr->rx_pkts;
 359                                rx_bytes = pcptr->rx_bytes;
 360                                rx_mcast = pcptr->rx_mcast;
 361                                tx_pkts = pcptr->tx_pkts;
 362                                tx_bytes = pcptr->tx_bytes;
 363                        } while (u64_stats_fetch_retry_irq(&pcptr->syncp,
 364                                                           strt));
 365
 366                        s->rx_packets += rx_pkts;
 367                        s->rx_bytes += rx_bytes;
 368                        s->multicast += rx_mcast;
 369                        s->tx_packets += tx_pkts;
 370                        s->tx_bytes += tx_bytes;
 371
 372                        /* u32 values are updated without syncp protection. */
 373                        rx_errs += pcptr->rx_errs;
 374                        tx_drps += pcptr->tx_drps;
 375                }
 376                s->rx_errors = rx_errs;
 377                s->rx_dropped = rx_errs;
 378                s->tx_dropped = tx_drps;
 379        }
 380}
 381
 382static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 383{
 384        struct ipvl_dev *ipvlan = netdev_priv(dev);
 385        struct net_device *phy_dev = ipvlan->phy_dev;
 386
 387        return vlan_vid_add(phy_dev, proto, vid);
 388}
 389
 390static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 391                                   u16 vid)
 392{
 393        struct ipvl_dev *ipvlan = netdev_priv(dev);
 394        struct net_device *phy_dev = ipvlan->phy_dev;
 395
 396        vlan_vid_del(phy_dev, proto, vid);
 397        return 0;
 398}
 399
 400static int ipvlan_get_iflink(const struct net_device *dev)
 401{
 402        struct ipvl_dev *ipvlan = netdev_priv(dev);
 403
 404        return ipvlan->phy_dev->ifindex;
 405}
 406
 407static const struct net_device_ops ipvlan_netdev_ops = {
 408        .ndo_init               = ipvlan_init,
 409        .ndo_uninit             = ipvlan_uninit,
 410        .ndo_open               = ipvlan_open,
 411        .ndo_stop               = ipvlan_stop,
 412        .ndo_start_xmit         = ipvlan_start_xmit,
 413        .ndo_fix_features       = ipvlan_fix_features,
 414        .ndo_change_rx_flags    = ipvlan_change_rx_flags,
 415        .ndo_set_rx_mode        = ipvlan_set_multicast_mac_filter,
 416        .ndo_get_stats64        = ipvlan_get_stats64,
 417        .ndo_vlan_rx_add_vid    = ipvlan_vlan_rx_add_vid,
 418        .ndo_vlan_rx_kill_vid   = ipvlan_vlan_rx_kill_vid,
 419        .ndo_get_iflink         = ipvlan_get_iflink,
 420};
 421
 422static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 423                              unsigned short type, const void *daddr,
 424                              const void *saddr, unsigned len)
 425{
 426        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 427        struct net_device *phy_dev = ipvlan->phy_dev;
 428
 429        /* TODO Probably use a different field than dev_addr so that the
 430         * mac-address on the virtual device is portable and can be carried
 431         * while the packets use the mac-addr on the physical device.
 432         */
 433        return dev_hard_header(skb, phy_dev, type, daddr,
 434                               saddr ? : phy_dev->dev_addr, len);
 435}
 436
 437static const struct header_ops ipvlan_header_ops = {
 438        .create         = ipvlan_hard_header,
 439        .parse          = eth_header_parse,
 440        .cache          = eth_header_cache,
 441        .cache_update   = eth_header_cache_update,
 442};
 443
 444static bool netif_is_ipvlan(const struct net_device *dev)
 445{
 446        /* both ipvlan and ipvtap devices use the same netdev_ops */
 447        return dev->netdev_ops == &ipvlan_netdev_ops;
 448}
 449
 450static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
 451                                             struct ethtool_link_ksettings *cmd)
 452{
 453        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 454
 455        return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd);
 456}
 457
 458static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
 459                                       struct ethtool_drvinfo *drvinfo)
 460{
 461        strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
 462        strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
 463}
 464
 465static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
 466{
 467        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 468
 469        return ipvlan->msg_enable;
 470}
 471
 472static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
 473{
 474        struct ipvl_dev *ipvlan = netdev_priv(dev);
 475
 476        ipvlan->msg_enable = value;
 477}
 478
 479static const struct ethtool_ops ipvlan_ethtool_ops = {
 480        .get_link       = ethtool_op_get_link,
 481        .get_link_ksettings     = ipvlan_ethtool_get_link_ksettings,
 482        .get_drvinfo    = ipvlan_ethtool_get_drvinfo,
 483        .get_msglevel   = ipvlan_ethtool_get_msglevel,
 484        .set_msglevel   = ipvlan_ethtool_set_msglevel,
 485};
 486
 487static int ipvlan_nl_changelink(struct net_device *dev,
 488                                struct nlattr *tb[], struct nlattr *data[],
 489                                struct netlink_ext_ack *extack)
 490{
 491        struct ipvl_dev *ipvlan = netdev_priv(dev);
 492        struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
 493        int err = 0;
 494
 495        if (!data)
 496                return 0;
 497
 498        if (data[IFLA_IPVLAN_MODE]) {
 499                u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 500
 501                err = ipvlan_set_port_mode(port, nmode);
 502        }
 503
 504        if (!err && data[IFLA_IPVLAN_FLAGS]) {
 505                u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
 506
 507                if (flags & IPVLAN_F_PRIVATE)
 508                        ipvlan_mark_private(port);
 509                else
 510                        ipvlan_clear_private(port);
 511
 512                if (flags & IPVLAN_F_VEPA)
 513                        ipvlan_mark_vepa(port);
 514                else
 515                        ipvlan_clear_vepa(port);
 516        }
 517
 518        return err;
 519}
 520
 521static size_t ipvlan_nl_getsize(const struct net_device *dev)
 522{
 523        return (0
 524                + nla_total_size(2) /* IFLA_IPVLAN_MODE */
 525                + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */
 526                );
 527}
 528
 529static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[],
 530                              struct netlink_ext_ack *extack)
 531{
 532        if (!data)
 533                return 0;
 534
 535        if (data[IFLA_IPVLAN_MODE]) {
 536                u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 537
 538                if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX)
 539                        return -EINVAL;
 540        }
 541        if (data[IFLA_IPVLAN_FLAGS]) {
 542                u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
 543
 544                /* Only two bits are used at this moment. */
 545                if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
 546                        return -EINVAL;
 547                /* Also both flags can't be active at the same time. */
 548                if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) ==
 549                    (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
 550                        return -EINVAL;
 551        }
 552
 553        return 0;
 554}
 555
 556static int ipvlan_nl_fillinfo(struct sk_buff *skb,
 557                              const struct net_device *dev)
 558{
 559        struct ipvl_dev *ipvlan = netdev_priv(dev);
 560        struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
 561        int ret = -EINVAL;
 562
 563        if (!port)
 564                goto err;
 565
 566        ret = -EMSGSIZE;
 567        if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
 568                goto err;
 569        if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags))
 570                goto err;
 571
 572        return 0;
 573
 574err:
 575        return ret;
 576}
 577
 578int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 579                    struct nlattr *tb[], struct nlattr *data[],
 580                    struct netlink_ext_ack *extack)
 581{
 582        struct ipvl_dev *ipvlan = netdev_priv(dev);
 583        struct ipvl_port *port;
 584        struct net_device *phy_dev;
 585        int err;
 586        u16 mode = IPVLAN_MODE_L3;
 587
 588        if (!tb[IFLA_LINK])
 589                return -EINVAL;
 590
 591        phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
 592        if (!phy_dev)
 593                return -ENODEV;
 594
 595        if (netif_is_ipvlan(phy_dev)) {
 596                struct ipvl_dev *tmp = netdev_priv(phy_dev);
 597
 598                phy_dev = tmp->phy_dev;
 599        } else if (!netif_is_ipvlan_port(phy_dev)) {
 600                /* Exit early if the underlying link is invalid or busy */
 601                if (phy_dev->type != ARPHRD_ETHER ||
 602                    phy_dev->flags & IFF_LOOPBACK) {
 603                        netdev_err(phy_dev,
 604                                   "Master is either lo or non-ether device\n");
 605                        return -EINVAL;
 606                }
 607
 608                if (netdev_is_rx_handler_busy(phy_dev)) {
 609                        netdev_err(phy_dev, "Device is already in use.\n");
 610                        return -EBUSY;
 611                }
 612        }
 613
 614        ipvlan->phy_dev = phy_dev;
 615        ipvlan->dev = dev;
 616        ipvlan->sfeatures = IPVLAN_FEATURES;
 617        if (!tb[IFLA_MTU])
 618                ipvlan_adjust_mtu(ipvlan, phy_dev);
 619        INIT_LIST_HEAD(&ipvlan->addrs);
 620        spin_lock_init(&ipvlan->addrs_lock);
 621
 622        /* TODO Probably put random address here to be presented to the
 623         * world but keep using the physical-dev address for the outgoing
 624         * packets.
 625         */
 626        memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 627
 628        dev->priv_flags |= IFF_NO_RX_HANDLER;
 629
 630        err = register_netdevice(dev);
 631        if (err < 0)
 632                return err;
 633
 634        /* ipvlan_init() would have created the port, if required */
 635        port = ipvlan_port_get_rtnl(phy_dev);
 636        ipvlan->port = port;
 637
 638        /* If the port-id base is at the MAX value, then wrap it around and
 639         * begin from 0x1 again. This may be due to a busy system where lots
 640         * of slaves are getting created and deleted.
 641         */
 642        if (port->dev_id_start == 0xFFFE)
 643                port->dev_id_start = 0x1;
 644
 645        /* Since L2 address is shared among all IPvlan slaves including
 646         * master, use unique 16 bit dev-ids to diffentiate among them.
 647         * Assign IDs between 0x1 and 0xFFFE (used by the master) to each
 648         * slave link [see addrconf_ifid_eui48()].
 649         */
 650        err = ida_simple_get(&port->ida, port->dev_id_start, 0xFFFE,
 651                             GFP_KERNEL);
 652        if (err < 0)
 653                err = ida_simple_get(&port->ida, 0x1, port->dev_id_start,
 654                                     GFP_KERNEL);
 655        if (err < 0)
 656                goto unregister_netdev;
 657        dev->dev_id = err;
 658
 659        /* Increment id-base to the next slot for the future assignment */
 660        port->dev_id_start = err + 1;
 661
 662        err = netdev_upper_dev_link(phy_dev, dev, extack);
 663        if (err)
 664                goto remove_ida;
 665
 666        /* Flags are per port and latest update overrides. User has
 667         * to be consistent in setting it just like the mode attribute.
 668         */
 669        if (data && data[IFLA_IPVLAN_FLAGS])
 670                port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
 671
 672        if (data && data[IFLA_IPVLAN_MODE])
 673                mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 674
 675        err = ipvlan_set_port_mode(port, mode);
 676        if (err)
 677                goto unlink_netdev;
 678
 679        list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
 680        netif_stacked_transfer_operstate(phy_dev, dev);
 681        return 0;
 682
 683unlink_netdev:
 684        netdev_upper_dev_unlink(phy_dev, dev);
 685remove_ida:
 686        ida_simple_remove(&port->ida, dev->dev_id);
 687unregister_netdev:
 688        unregister_netdevice(dev);
 689        return err;
 690}
 691EXPORT_SYMBOL_GPL(ipvlan_link_new);
 692
 693void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 694{
 695        struct ipvl_dev *ipvlan = netdev_priv(dev);
 696        struct ipvl_addr *addr, *next;
 697
 698        spin_lock_bh(&ipvlan->addrs_lock);
 699        list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 700                ipvlan_ht_addr_del(addr);
 701                list_del_rcu(&addr->anode);
 702                kfree_rcu(addr, rcu);
 703        }
 704        spin_unlock_bh(&ipvlan->addrs_lock);
 705
 706        ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
 707        list_del_rcu(&ipvlan->pnode);
 708        unregister_netdevice_queue(dev, head);
 709        netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
 710}
 711EXPORT_SYMBOL_GPL(ipvlan_link_delete);
 712
 713void ipvlan_link_setup(struct net_device *dev)
 714{
 715        ether_setup(dev);
 716
 717        dev->max_mtu = ETH_MAX_MTU;
 718        dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 719        dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
 720        dev->netdev_ops = &ipvlan_netdev_ops;
 721        dev->needs_free_netdev = true;
 722        dev->header_ops = &ipvlan_header_ops;
 723        dev->ethtool_ops = &ipvlan_ethtool_ops;
 724}
 725EXPORT_SYMBOL_GPL(ipvlan_link_setup);
 726
 727static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
 728{
 729        [IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
 730        [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 },
 731};
 732
 733static struct rtnl_link_ops ipvlan_link_ops = {
 734        .kind           = "ipvlan",
 735        .priv_size      = sizeof(struct ipvl_dev),
 736
 737        .setup          = ipvlan_link_setup,
 738        .newlink        = ipvlan_link_new,
 739        .dellink        = ipvlan_link_delete,
 740};
 741
 742int ipvlan_link_register(struct rtnl_link_ops *ops)
 743{
 744        ops->get_size   = ipvlan_nl_getsize;
 745        ops->policy     = ipvlan_nl_policy;
 746        ops->validate   = ipvlan_nl_validate;
 747        ops->fill_info  = ipvlan_nl_fillinfo;
 748        ops->changelink = ipvlan_nl_changelink;
 749        ops->maxtype    = IFLA_IPVLAN_MAX;
 750        return rtnl_link_register(ops);
 751}
 752EXPORT_SYMBOL_GPL(ipvlan_link_register);
 753
 754static int ipvlan_device_event(struct notifier_block *unused,
 755                               unsigned long event, void *ptr)
 756{
 757        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 758        struct ipvl_dev *ipvlan, *next;
 759        struct ipvl_port *port;
 760        LIST_HEAD(lst_kill);
 761
 762        if (!netif_is_ipvlan_port(dev))
 763                return NOTIFY_DONE;
 764
 765        port = ipvlan_port_get_rtnl(dev);
 766
 767        switch (event) {
 768        case NETDEV_CHANGE:
 769                list_for_each_entry(ipvlan, &port->ipvlans, pnode)
 770                        netif_stacked_transfer_operstate(ipvlan->phy_dev,
 771                                                         ipvlan->dev);
 772                break;
 773
 774        case NETDEV_REGISTER: {
 775                struct net *oldnet, *newnet = dev_net(dev);
 776                struct ipvlan_netns *old_vnet;
 777
 778                oldnet = read_pnet(&port->pnet);
 779                if (net_eq(newnet, oldnet))
 780                        break;
 781
 782                write_pnet(&port->pnet, newnet);
 783
 784                old_vnet = net_generic(oldnet, ipvlan_netid);
 785                if (!old_vnet->ipvl_nf_hook_refcnt)
 786                        break;
 787
 788                ipvlan_register_nf_hook(newnet);
 789                ipvlan_unregister_nf_hook(oldnet);
 790                break;
 791        }
 792        case NETDEV_UNREGISTER:
 793                if (dev->reg_state != NETREG_UNREGISTERING)
 794                        break;
 795
 796                list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
 797                        ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
 798                                                            &lst_kill);
 799                unregister_netdevice_many(&lst_kill);
 800                break;
 801
 802        case NETDEV_FEAT_CHANGE:
 803                list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
 804                        ipvlan->dev->features = dev->features & IPVLAN_FEATURES;
 805                        ipvlan->dev->gso_max_size = dev->gso_max_size;
 806                        ipvlan->dev->gso_max_segs = dev->gso_max_segs;
 807                        netdev_features_change(ipvlan->dev);
 808                }
 809                break;
 810
 811        case NETDEV_CHANGEMTU:
 812                list_for_each_entry(ipvlan, &port->ipvlans, pnode)
 813                        ipvlan_adjust_mtu(ipvlan, dev);
 814                break;
 815
 816        case NETDEV_CHANGEADDR:
 817                list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
 818                        ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
 819                        call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
 820                }
 821                break;
 822
 823        case NETDEV_PRE_TYPE_CHANGE:
 824                /* Forbid underlying device to change its type. */
 825                return NOTIFY_BAD;
 826        }
 827        return NOTIFY_DONE;
 828}
 829
 830/* the caller must held the addrs lock */
 831static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 832{
 833        struct ipvl_addr *addr;
 834
 835        addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
 836        if (!addr)
 837                return -ENOMEM;
 838
 839        addr->master = ipvlan;
 840        if (!is_v6) {
 841                memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
 842                addr->atype = IPVL_IPV4;
 843#if IS_ENABLED(CONFIG_IPV6)
 844        } else {
 845                memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
 846                addr->atype = IPVL_IPV6;
 847#endif
 848        }
 849
 850        list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 851
 852        /* If the interface is not up, the address will be added to the hash
 853         * list by ipvlan_open.
 854         */
 855        if (netif_running(ipvlan->dev))
 856                ipvlan_ht_addr_add(ipvlan, addr);
 857
 858        return 0;
 859}
 860
 861static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 862{
 863        struct ipvl_addr *addr;
 864
 865        spin_lock_bh(&ipvlan->addrs_lock);
 866        addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
 867        if (!addr) {
 868                spin_unlock_bh(&ipvlan->addrs_lock);
 869                return;
 870        }
 871
 872        ipvlan_ht_addr_del(addr);
 873        list_del_rcu(&addr->anode);
 874        spin_unlock_bh(&ipvlan->addrs_lock);
 875        kfree_rcu(addr, rcu);
 876}
 877
 878static bool ipvlan_is_valid_dev(const struct net_device *dev)
 879{
 880        struct ipvl_dev *ipvlan = netdev_priv(dev);
 881
 882        if (!netif_is_ipvlan(dev))
 883                return false;
 884
 885        if (!ipvlan || !ipvlan->port)
 886                return false;
 887
 888        return true;
 889}
 890
 891#if IS_ENABLED(CONFIG_IPV6)
 892static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 893{
 894        int ret = -EINVAL;
 895
 896        spin_lock_bh(&ipvlan->addrs_lock);
 897        if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
 898                netif_err(ipvlan, ifup, ipvlan->dev,
 899                          "Failed to add IPv6=%pI6c addr for %s intf\n",
 900                          ip6_addr, ipvlan->dev->name);
 901        else
 902                ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
 903        spin_unlock_bh(&ipvlan->addrs_lock);
 904        return ret;
 905}
 906
 907static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 908{
 909        return ipvlan_del_addr(ipvlan, ip6_addr, true);
 910}
 911
 912static int ipvlan_addr6_event(struct notifier_block *unused,
 913                              unsigned long event, void *ptr)
 914{
 915        struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
 916        struct net_device *dev = (struct net_device *)if6->idev->dev;
 917        struct ipvl_dev *ipvlan = netdev_priv(dev);
 918
 919        if (!ipvlan_is_valid_dev(dev))
 920                return NOTIFY_DONE;
 921
 922        switch (event) {
 923        case NETDEV_UP:
 924                if (ipvlan_add_addr6(ipvlan, &if6->addr))
 925                        return NOTIFY_BAD;
 926                break;
 927
 928        case NETDEV_DOWN:
 929                ipvlan_del_addr6(ipvlan, &if6->addr);
 930                break;
 931        }
 932
 933        return NOTIFY_OK;
 934}
 935
 936static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 937                                        unsigned long event, void *ptr)
 938{
 939        struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
 940        struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
 941        struct ipvl_dev *ipvlan = netdev_priv(dev);
 942
 943        if (!ipvlan_is_valid_dev(dev))
 944                return NOTIFY_DONE;
 945
 946        switch (event) {
 947        case NETDEV_UP:
 948                if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
 949                        NL_SET_ERR_MSG(i6vi->extack,
 950                                       "Address already assigned to an ipvlan device");
 951                        return notifier_from_errno(-EADDRINUSE);
 952                }
 953                break;
 954        }
 955
 956        return NOTIFY_OK;
 957}
 958#endif
 959
 960static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 961{
 962        int ret = -EINVAL;
 963
 964        spin_lock_bh(&ipvlan->addrs_lock);
 965        if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
 966                netif_err(ipvlan, ifup, ipvlan->dev,
 967                          "Failed to add IPv4=%pI4 on %s intf.\n",
 968                          ip4_addr, ipvlan->dev->name);
 969        else
 970                ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
 971        spin_unlock_bh(&ipvlan->addrs_lock);
 972        return ret;
 973}
 974
 975static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 976{
 977        return ipvlan_del_addr(ipvlan, ip4_addr, false);
 978}
 979
 980static int ipvlan_addr4_event(struct notifier_block *unused,
 981                              unsigned long event, void *ptr)
 982{
 983        struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
 984        struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
 985        struct ipvl_dev *ipvlan = netdev_priv(dev);
 986        struct in_addr ip4_addr;
 987
 988        if (!ipvlan_is_valid_dev(dev))
 989                return NOTIFY_DONE;
 990
 991        switch (event) {
 992        case NETDEV_UP:
 993                ip4_addr.s_addr = if4->ifa_address;
 994                if (ipvlan_add_addr4(ipvlan, &ip4_addr))
 995                        return NOTIFY_BAD;
 996                break;
 997
 998        case NETDEV_DOWN:
 999                ip4_addr.s_addr = if4->ifa_address;
1000                ipvlan_del_addr4(ipvlan, &ip4_addr);
1001                break;
1002        }
1003
1004        return NOTIFY_OK;
1005}
1006
1007static int ipvlan_addr4_validator_event(struct notifier_block *unused,
1008                                        unsigned long event, void *ptr)
1009{
1010        struct in_validator_info *ivi = (struct in_validator_info *)ptr;
1011        struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
1012        struct ipvl_dev *ipvlan = netdev_priv(dev);
1013
1014        if (!ipvlan_is_valid_dev(dev))
1015                return NOTIFY_DONE;
1016
1017        switch (event) {
1018        case NETDEV_UP:
1019                if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
1020                        NL_SET_ERR_MSG(ivi->extack,
1021                                       "Address already assigned to an ipvlan device");
1022                        return notifier_from_errno(-EADDRINUSE);
1023                }
1024                break;
1025        }
1026
1027        return NOTIFY_OK;
1028}
1029
1030static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
1031        .notifier_call = ipvlan_addr4_event,
1032};
1033
1034static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = {
1035        .notifier_call = ipvlan_addr4_validator_event,
1036};
1037
1038static struct notifier_block ipvlan_notifier_block __read_mostly = {
1039        .notifier_call = ipvlan_device_event,
1040};
1041
1042#if IS_ENABLED(CONFIG_IPV6)
1043static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
1044        .notifier_call = ipvlan_addr6_event,
1045};
1046
1047static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
1048        .notifier_call = ipvlan_addr6_validator_event,
1049};
1050#endif
1051
1052static void ipvlan_ns_exit(struct net *net)
1053{
1054        struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
1055
1056        if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) {
1057                vnet->ipvl_nf_hook_refcnt = 0;
1058                nf_unregister_net_hooks(net, ipvl_nfops,
1059                                        ARRAY_SIZE(ipvl_nfops));
1060        }
1061}
1062
1063static struct pernet_operations ipvlan_net_ops = {
1064        .id = &ipvlan_netid,
1065        .size = sizeof(struct ipvlan_netns),
1066        .exit = ipvlan_ns_exit,
1067};
1068
1069static int __init ipvlan_init_module(void)
1070{
1071        int err;
1072
1073        ipvlan_init_secret();
1074        register_netdevice_notifier(&ipvlan_notifier_block);
1075#if IS_ENABLED(CONFIG_IPV6)
1076        register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1077        register_inet6addr_validator_notifier(
1078            &ipvlan_addr6_vtor_notifier_block);
1079#endif
1080        register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1081        register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
1082
1083        err = register_pernet_subsys(&ipvlan_net_ops);
1084        if (err < 0)
1085                goto error;
1086
1087        err = ipvlan_link_register(&ipvlan_link_ops);
1088        if (err < 0) {
1089                unregister_pernet_subsys(&ipvlan_net_ops);
1090                goto error;
1091        }
1092
1093        return 0;
1094error:
1095        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1096        unregister_inetaddr_validator_notifier(
1097            &ipvlan_addr4_vtor_notifier_block);
1098#if IS_ENABLED(CONFIG_IPV6)
1099        unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1100        unregister_inet6addr_validator_notifier(
1101            &ipvlan_addr6_vtor_notifier_block);
1102#endif
1103        unregister_netdevice_notifier(&ipvlan_notifier_block);
1104        return err;
1105}
1106
1107static void __exit ipvlan_cleanup_module(void)
1108{
1109        rtnl_link_unregister(&ipvlan_link_ops);
1110        unregister_pernet_subsys(&ipvlan_net_ops);
1111        unregister_netdevice_notifier(&ipvlan_notifier_block);
1112        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1113        unregister_inetaddr_validator_notifier(
1114            &ipvlan_addr4_vtor_notifier_block);
1115#if IS_ENABLED(CONFIG_IPV6)
1116        unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1117        unregister_inet6addr_validator_notifier(
1118            &ipvlan_addr6_vtor_notifier_block);
1119#endif
1120}
1121
1122module_init(ipvlan_init_module);
1123module_exit(ipvlan_cleanup_module);
1124
1125MODULE_LICENSE("GPL");
1126MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
1127MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
1128MODULE_ALIAS_RTNL_LINK("ipvlan");
1129