linux/drivers/net/ipvlan/ipvlan_main.c
<<
>>
Prefs
   1/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of the GNU General Public License as
   5 * published by the Free Software Foundation; either version 2 of
   6 * the License, or (at your option) any later version.
   7 *
   8 */
   9
  10#include "ipvlan.h"
  11
  12static unsigned int ipvlan_netid __read_mostly;
  13
  14struct ipvlan_netns {
  15        unsigned int ipvl_nf_hook_refcnt;
  16};
  17
  18static const struct nf_hook_ops ipvl_nfops[] = {
  19        {
  20                .hook     = ipvlan_nf_input,
  21                .pf       = NFPROTO_IPV4,
  22                .hooknum  = NF_INET_LOCAL_IN,
  23                .priority = INT_MAX,
  24        },
  25        {
  26                .hook     = ipvlan_nf_input,
  27                .pf       = NFPROTO_IPV6,
  28                .hooknum  = NF_INET_LOCAL_IN,
  29                .priority = INT_MAX,
  30        },
  31};
  32
  33static const struct l3mdev_ops ipvl_l3mdev_ops = {
  34        .l3mdev_l3_rcv = ipvlan_l3_rcv,
  35};
  36
  37static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
  38{
  39        ipvlan->dev->mtu = dev->mtu;
  40}
  41
  42static int ipvlan_register_nf_hook(struct net *net)
  43{
  44        struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  45        int err = 0;
  46
  47        if (!vnet->ipvl_nf_hook_refcnt) {
  48                err = nf_register_net_hooks(net, ipvl_nfops,
  49                                            ARRAY_SIZE(ipvl_nfops));
  50                if (!err)
  51                        vnet->ipvl_nf_hook_refcnt = 1;
  52        } else {
  53                vnet->ipvl_nf_hook_refcnt++;
  54        }
  55
  56        return err;
  57}
  58
  59static void ipvlan_unregister_nf_hook(struct net *net)
  60{
  61        struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  62
  63        if (WARN_ON(!vnet->ipvl_nf_hook_refcnt))
  64                return;
  65
  66        vnet->ipvl_nf_hook_refcnt--;
  67        if (!vnet->ipvl_nf_hook_refcnt)
  68                nf_unregister_net_hooks(net, ipvl_nfops,
  69                                        ARRAY_SIZE(ipvl_nfops));
  70}
  71
  72static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
  73{
  74        struct ipvl_dev *ipvlan;
  75        struct net_device *mdev = port->dev;
  76        int err = 0;
  77
  78        ASSERT_RTNL();
  79        if (port->mode != nval) {
  80                if (nval == IPVLAN_MODE_L3S) {
  81                        /* New mode is L3S */
  82                        err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
  83                        if (!err) {
  84                                mdev->l3mdev_ops = &ipvl_l3mdev_ops;
  85                                mdev->priv_flags |= IFF_L3MDEV_MASTER;
  86                        } else
  87                                return err;
  88                } else if (port->mode == IPVLAN_MODE_L3S) {
  89                        /* Old mode was L3S */
  90                        mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
  91                        ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
  92                        mdev->l3mdev_ops = NULL;
  93                }
  94                list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
  95                        if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
  96                                ipvlan->dev->flags |= IFF_NOARP;
  97                        else
  98                                ipvlan->dev->flags &= ~IFF_NOARP;
  99                }
 100                port->mode = nval;
 101        }
 102        return err;
 103}
 104
 105static int ipvlan_port_create(struct net_device *dev)
 106{
 107        struct ipvl_port *port;
 108        int err, idx;
 109
 110        port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
 111        if (!port)
 112                return -ENOMEM;
 113
 114        write_pnet(&port->pnet, dev_net(dev));
 115        port->dev = dev;
 116        port->mode = IPVLAN_MODE_L3;
 117        INIT_LIST_HEAD(&port->ipvlans);
 118        for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
 119                INIT_HLIST_HEAD(&port->hlhead[idx]);
 120
 121        skb_queue_head_init(&port->backlog);
 122        INIT_WORK(&port->wq, ipvlan_process_multicast);
 123        ida_init(&port->ida);
 124        port->dev_id_start = 1;
 125
 126        err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
 127        if (err)
 128                goto err;
 129
 130        dev->priv_flags |= IFF_IPVLAN_MASTER;
 131        return 0;
 132
 133err:
 134        kfree(port);
 135        return err;
 136}
 137
 138static void ipvlan_port_destroy(struct net_device *dev)
 139{
 140        struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 141        struct sk_buff *skb;
 142
 143        dev->priv_flags &= ~IFF_IPVLAN_MASTER;
 144        if (port->mode == IPVLAN_MODE_L3S) {
 145                dev->priv_flags &= ~IFF_L3MDEV_MASTER;
 146                ipvlan_unregister_nf_hook(dev_net(dev));
 147                dev->l3mdev_ops = NULL;
 148        }
 149        netdev_rx_handler_unregister(dev);
 150        cancel_work_sync(&port->wq);
 151        while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
 152                if (skb->dev)
 153                        dev_put(skb->dev);
 154                kfree_skb(skb);
 155        }
 156        ida_destroy(&port->ida);
 157        kfree(port);
 158}
 159
 160#define IPVLAN_FEATURES \
 161        (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 162         NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
 163         NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 164         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 165
 166#define IPVLAN_STATE_MASK \
 167        ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 168
 169static int ipvlan_init(struct net_device *dev)
 170{
 171        struct ipvl_dev *ipvlan = netdev_priv(dev);
 172        struct net_device *phy_dev = ipvlan->phy_dev;
 173        struct ipvl_port *port;
 174        int err;
 175
 176        dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
 177                     (phy_dev->state & IPVLAN_STATE_MASK);
 178        dev->features = phy_dev->features & IPVLAN_FEATURES;
 179        dev->features |= NETIF_F_LLTX;
 180        dev->gso_max_size = phy_dev->gso_max_size;
 181        dev->gso_max_segs = phy_dev->gso_max_segs;
 182        dev->hard_header_len = phy_dev->hard_header_len;
 183
 184        netdev_lockdep_set_classes(dev);
 185
 186        ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
 187        if (!ipvlan->pcpu_stats)
 188                return -ENOMEM;
 189
 190        if (!netif_is_ipvlan_port(phy_dev)) {
 191                err = ipvlan_port_create(phy_dev);
 192                if (err < 0) {
 193                        free_percpu(ipvlan->pcpu_stats);
 194                        return err;
 195                }
 196        }
 197        port = ipvlan_port_get_rtnl(phy_dev);
 198        port->count += 1;
 199        return 0;
 200}
 201
 202static void ipvlan_uninit(struct net_device *dev)
 203{
 204        struct ipvl_dev *ipvlan = netdev_priv(dev);
 205        struct net_device *phy_dev = ipvlan->phy_dev;
 206        struct ipvl_port *port;
 207
 208        free_percpu(ipvlan->pcpu_stats);
 209
 210        port = ipvlan_port_get_rtnl(phy_dev);
 211        port->count -= 1;
 212        if (!port->count)
 213                ipvlan_port_destroy(port->dev);
 214}
 215
 216static int ipvlan_open(struct net_device *dev)
 217{
 218        struct ipvl_dev *ipvlan = netdev_priv(dev);
 219        struct net_device *phy_dev = ipvlan->phy_dev;
 220        struct ipvl_addr *addr;
 221
 222        if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
 223            ipvlan->port->mode == IPVLAN_MODE_L3S)
 224                dev->flags |= IFF_NOARP;
 225        else
 226                dev->flags &= ~IFF_NOARP;
 227
 228        list_for_each_entry(addr, &ipvlan->addrs, anode)
 229                ipvlan_ht_addr_add(ipvlan, addr);
 230
 231        return dev_uc_add(phy_dev, phy_dev->dev_addr);
 232}
 233
 234static int ipvlan_stop(struct net_device *dev)
 235{
 236        struct ipvl_dev *ipvlan = netdev_priv(dev);
 237        struct net_device *phy_dev = ipvlan->phy_dev;
 238        struct ipvl_addr *addr;
 239
 240        dev_uc_unsync(phy_dev, dev);
 241        dev_mc_unsync(phy_dev, dev);
 242
 243        dev_uc_del(phy_dev, phy_dev->dev_addr);
 244
 245        list_for_each_entry(addr, &ipvlan->addrs, anode)
 246                ipvlan_ht_addr_del(addr);
 247
 248        return 0;
 249}
 250
 251static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb,
 252                                     struct net_device *dev)
 253{
 254        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 255        int skblen = skb->len;
 256        int ret;
 257
 258        ret = ipvlan_queue_xmit(skb, dev);
 259        if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 260                struct ipvl_pcpu_stats *pcptr;
 261
 262                pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
 263
 264                u64_stats_update_begin(&pcptr->syncp);
 265                pcptr->tx_pkts++;
 266                pcptr->tx_bytes += skblen;
 267                u64_stats_update_end(&pcptr->syncp);
 268        } else {
 269                this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
 270        }
 271        return ret;
 272}
 273
 274static netdev_features_t ipvlan_fix_features(struct net_device *dev,
 275                                             netdev_features_t features)
 276{
 277        struct ipvl_dev *ipvlan = netdev_priv(dev);
 278
 279        return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES);
 280}
 281
 282static void ipvlan_change_rx_flags(struct net_device *dev, int change)
 283{
 284        struct ipvl_dev *ipvlan = netdev_priv(dev);
 285        struct net_device *phy_dev = ipvlan->phy_dev;
 286
 287        if (change & IFF_ALLMULTI)
 288                dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
 289}
 290
 291static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
 292{
 293        struct ipvl_dev *ipvlan = netdev_priv(dev);
 294
 295        if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
 296                bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
 297        } else {
 298                struct netdev_hw_addr *ha;
 299                DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
 300
 301                bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
 302                netdev_for_each_mc_addr(ha, dev)
 303                        __set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
 304
 305                /* Turn-on broadcast bit irrespective of address family,
 306                 * since broadcast is deferred to a work-queue, hence no
 307                 * impact on fast-path processing.
 308                 */
 309                __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
 310
 311                bitmap_copy(ipvlan->mac_filters, mc_filters,
 312                            IPVLAN_MAC_FILTER_SIZE);
 313        }
 314        dev_uc_sync(ipvlan->phy_dev, dev);
 315        dev_mc_sync(ipvlan->phy_dev, dev);
 316}
 317
 318static void ipvlan_get_stats64(struct net_device *dev,
 319                               struct rtnl_link_stats64 *s)
 320{
 321        struct ipvl_dev *ipvlan = netdev_priv(dev);
 322
 323        if (ipvlan->pcpu_stats) {
 324                struct ipvl_pcpu_stats *pcptr;
 325                u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
 326                u32 rx_errs = 0, tx_drps = 0;
 327                u32 strt;
 328                int idx;
 329
 330                for_each_possible_cpu(idx) {
 331                        pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
 332                        do {
 333                                strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
 334                                rx_pkts = pcptr->rx_pkts;
 335                                rx_bytes = pcptr->rx_bytes;
 336                                rx_mcast = pcptr->rx_mcast;
 337                                tx_pkts = pcptr->tx_pkts;
 338                                tx_bytes = pcptr->tx_bytes;
 339                        } while (u64_stats_fetch_retry_irq(&pcptr->syncp,
 340                                                           strt));
 341
 342                        s->rx_packets += rx_pkts;
 343                        s->rx_bytes += rx_bytes;
 344                        s->multicast += rx_mcast;
 345                        s->tx_packets += tx_pkts;
 346                        s->tx_bytes += tx_bytes;
 347
 348                        /* u32 values are updated without syncp protection. */
 349                        rx_errs += pcptr->rx_errs;
 350                        tx_drps += pcptr->tx_drps;
 351                }
 352                s->rx_errors = rx_errs;
 353                s->rx_dropped = rx_errs;
 354                s->tx_dropped = tx_drps;
 355        }
 356}
 357
 358static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 359{
 360        struct ipvl_dev *ipvlan = netdev_priv(dev);
 361        struct net_device *phy_dev = ipvlan->phy_dev;
 362
 363        return vlan_vid_add(phy_dev, proto, vid);
 364}
 365
 366static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 367                                   u16 vid)
 368{
 369        struct ipvl_dev *ipvlan = netdev_priv(dev);
 370        struct net_device *phy_dev = ipvlan->phy_dev;
 371
 372        vlan_vid_del(phy_dev, proto, vid);
 373        return 0;
 374}
 375
 376static int ipvlan_get_iflink(const struct net_device *dev)
 377{
 378        struct ipvl_dev *ipvlan = netdev_priv(dev);
 379
 380        return ipvlan->phy_dev->ifindex;
 381}
 382
 383static const struct net_device_ops ipvlan_netdev_ops = {
 384        .ndo_init               = ipvlan_init,
 385        .ndo_uninit             = ipvlan_uninit,
 386        .ndo_open               = ipvlan_open,
 387        .ndo_stop               = ipvlan_stop,
 388        .ndo_start_xmit         = ipvlan_start_xmit,
 389        .ndo_fix_features       = ipvlan_fix_features,
 390        .ndo_change_rx_flags    = ipvlan_change_rx_flags,
 391        .ndo_set_rx_mode        = ipvlan_set_multicast_mac_filter,
 392        .ndo_get_stats64        = ipvlan_get_stats64,
 393        .ndo_vlan_rx_add_vid    = ipvlan_vlan_rx_add_vid,
 394        .ndo_vlan_rx_kill_vid   = ipvlan_vlan_rx_kill_vid,
 395        .ndo_get_iflink         = ipvlan_get_iflink,
 396};
 397
 398static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 399                              unsigned short type, const void *daddr,
 400                              const void *saddr, unsigned len)
 401{
 402        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 403        struct net_device *phy_dev = ipvlan->phy_dev;
 404
 405        /* TODO Probably use a different field than dev_addr so that the
 406         * mac-address on the virtual device is portable and can be carried
 407         * while the packets use the mac-addr on the physical device.
 408         */
 409        return dev_hard_header(skb, phy_dev, type, daddr,
 410                               saddr ? : phy_dev->dev_addr, len);
 411}
 412
 413static const struct header_ops ipvlan_header_ops = {
 414        .create         = ipvlan_hard_header,
 415        .parse          = eth_header_parse,
 416        .cache          = eth_header_cache,
 417        .cache_update   = eth_header_cache_update,
 418};
 419
 420static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
 421                                             struct ethtool_link_ksettings *cmd)
 422{
 423        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 424
 425        return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd);
 426}
 427
 428static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
 429                                       struct ethtool_drvinfo *drvinfo)
 430{
 431        strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
 432        strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
 433}
 434
 435static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
 436{
 437        const struct ipvl_dev *ipvlan = netdev_priv(dev);
 438
 439        return ipvlan->msg_enable;
 440}
 441
 442static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
 443{
 444        struct ipvl_dev *ipvlan = netdev_priv(dev);
 445
 446        ipvlan->msg_enable = value;
 447}
 448
 449static const struct ethtool_ops ipvlan_ethtool_ops = {
 450        .get_link       = ethtool_op_get_link,
 451        .get_link_ksettings     = ipvlan_ethtool_get_link_ksettings,
 452        .get_drvinfo    = ipvlan_ethtool_get_drvinfo,
 453        .get_msglevel   = ipvlan_ethtool_get_msglevel,
 454        .set_msglevel   = ipvlan_ethtool_set_msglevel,
 455};
 456
 457static int ipvlan_nl_changelink(struct net_device *dev,
 458                                struct nlattr *tb[], struct nlattr *data[],
 459                                struct netlink_ext_ack *extack)
 460{
 461        struct ipvl_dev *ipvlan = netdev_priv(dev);
 462        struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
 463        int err = 0;
 464
 465        if (!data)
 466                return 0;
 467
 468        if (data[IFLA_IPVLAN_MODE]) {
 469                u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 470
 471                err = ipvlan_set_port_mode(port, nmode);
 472        }
 473
 474        if (!err && data[IFLA_IPVLAN_FLAGS]) {
 475                u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
 476
 477                if (flags & IPVLAN_F_PRIVATE)
 478                        ipvlan_mark_private(port);
 479                else
 480                        ipvlan_clear_private(port);
 481
 482                if (flags & IPVLAN_F_VEPA)
 483                        ipvlan_mark_vepa(port);
 484                else
 485                        ipvlan_clear_vepa(port);
 486        }
 487
 488        return err;
 489}
 490
 491static size_t ipvlan_nl_getsize(const struct net_device *dev)
 492{
 493        return (0
 494                + nla_total_size(2) /* IFLA_IPVLAN_MODE */
 495                + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */
 496                );
 497}
 498
 499static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[],
 500                              struct netlink_ext_ack *extack)
 501{
 502        if (!data)
 503                return 0;
 504
 505        if (data[IFLA_IPVLAN_MODE]) {
 506                u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 507
 508                if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX)
 509                        return -EINVAL;
 510        }
 511        if (data[IFLA_IPVLAN_FLAGS]) {
 512                u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
 513
 514                /* Only two bits are used at this moment. */
 515                if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
 516                        return -EINVAL;
 517                /* Also both flags can't be active at the same time. */
 518                if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) ==
 519                    (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
 520                        return -EINVAL;
 521        }
 522
 523        return 0;
 524}
 525
 526static int ipvlan_nl_fillinfo(struct sk_buff *skb,
 527                              const struct net_device *dev)
 528{
 529        struct ipvl_dev *ipvlan = netdev_priv(dev);
 530        struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
 531        int ret = -EINVAL;
 532
 533        if (!port)
 534                goto err;
 535
 536        ret = -EMSGSIZE;
 537        if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
 538                goto err;
 539        if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags))
 540                goto err;
 541
 542        return 0;
 543
 544err:
 545        return ret;
 546}
 547
 548int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 549                    struct nlattr *tb[], struct nlattr *data[],
 550                    struct netlink_ext_ack *extack)
 551{
 552        struct ipvl_dev *ipvlan = netdev_priv(dev);
 553        struct ipvl_port *port;
 554        struct net_device *phy_dev;
 555        int err;
 556        u16 mode = IPVLAN_MODE_L3;
 557
 558        if (!tb[IFLA_LINK])
 559                return -EINVAL;
 560
 561        phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
 562        if (!phy_dev)
 563                return -ENODEV;
 564
 565        if (netif_is_ipvlan(phy_dev)) {
 566                struct ipvl_dev *tmp = netdev_priv(phy_dev);
 567
 568                phy_dev = tmp->phy_dev;
 569        } else if (!netif_is_ipvlan_port(phy_dev)) {
 570                /* Exit early if the underlying link is invalid or busy */
 571                if (phy_dev->type != ARPHRD_ETHER ||
 572                    phy_dev->flags & IFF_LOOPBACK) {
 573                        netdev_err(phy_dev,
 574                                   "Master is either lo or non-ether device\n");
 575                        return -EINVAL;
 576                }
 577
 578                if (netdev_is_rx_handler_busy(phy_dev)) {
 579                        netdev_err(phy_dev, "Device is already in use.\n");
 580                        return -EBUSY;
 581                }
 582        }
 583
 584        ipvlan->phy_dev = phy_dev;
 585        ipvlan->dev = dev;
 586        ipvlan->sfeatures = IPVLAN_FEATURES;
 587        ipvlan_adjust_mtu(ipvlan, phy_dev);
 588        INIT_LIST_HEAD(&ipvlan->addrs);
 589
 590        /* TODO Probably put random address here to be presented to the
 591         * world but keep using the physical-dev address for the outgoing
 592         * packets.
 593         */
 594        memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 595
 596        dev->priv_flags |= IFF_IPVLAN_SLAVE;
 597
 598        err = register_netdevice(dev);
 599        if (err < 0)
 600                return err;
 601
 602        /* ipvlan_init() would have created the port, if required */
 603        port = ipvlan_port_get_rtnl(phy_dev);
 604        ipvlan->port = port;
 605
 606        /* If the port-id base is at the MAX value, then wrap it around and
 607         * begin from 0x1 again. This may be due to a busy system where lots
 608         * of slaves are getting created and deleted.
 609         */
 610        if (port->dev_id_start == 0xFFFE)
 611                port->dev_id_start = 0x1;
 612
 613        /* Since L2 address is shared among all IPvlan slaves including
 614         * master, use unique 16 bit dev-ids to diffentiate among them.
 615         * Assign IDs between 0x1 and 0xFFFE (used by the master) to each
 616         * slave link [see addrconf_ifid_eui48()].
 617         */
 618        err = ida_simple_get(&port->ida, port->dev_id_start, 0xFFFE,
 619                             GFP_KERNEL);
 620        if (err < 0)
 621                err = ida_simple_get(&port->ida, 0x1, port->dev_id_start,
 622                                     GFP_KERNEL);
 623        if (err < 0)
 624                goto unregister_netdev;
 625        dev->dev_id = err;
 626
 627        /* Increment id-base to the next slot for the future assignment */
 628        port->dev_id_start = err + 1;
 629
 630        err = netdev_upper_dev_link(phy_dev, dev, extack);
 631        if (err)
 632                goto remove_ida;
 633
 634        /* Flags are per port and latest update overrides. User has
 635         * to be consistent in setting it just like the mode attribute.
 636         */
 637        if (data && data[IFLA_IPVLAN_FLAGS])
 638                port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
 639
 640        if (data && data[IFLA_IPVLAN_MODE])
 641                mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 642
 643        err = ipvlan_set_port_mode(port, mode);
 644        if (err)
 645                goto unlink_netdev;
 646
 647        list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
 648        netif_stacked_transfer_operstate(phy_dev, dev);
 649        return 0;
 650
 651unlink_netdev:
 652        netdev_upper_dev_unlink(phy_dev, dev);
 653remove_ida:
 654        ida_simple_remove(&port->ida, dev->dev_id);
 655unregister_netdev:
 656        unregister_netdevice(dev);
 657        return err;
 658}
 659EXPORT_SYMBOL_GPL(ipvlan_link_new);
 660
 661void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 662{
 663        struct ipvl_dev *ipvlan = netdev_priv(dev);
 664        struct ipvl_addr *addr, *next;
 665
 666        list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 667                ipvlan_ht_addr_del(addr);
 668                list_del(&addr->anode);
 669                kfree_rcu(addr, rcu);
 670        }
 671
 672        ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
 673        list_del_rcu(&ipvlan->pnode);
 674        unregister_netdevice_queue(dev, head);
 675        netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
 676}
 677EXPORT_SYMBOL_GPL(ipvlan_link_delete);
 678
 679void ipvlan_link_setup(struct net_device *dev)
 680{
 681        ether_setup(dev);
 682
 683        dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 684        dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
 685        dev->netdev_ops = &ipvlan_netdev_ops;
 686        dev->needs_free_netdev = true;
 687        dev->header_ops = &ipvlan_header_ops;
 688        dev->ethtool_ops = &ipvlan_ethtool_ops;
 689}
 690EXPORT_SYMBOL_GPL(ipvlan_link_setup);
 691
 692static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
 693{
 694        [IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
 695        [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 },
 696};
 697
 698static struct rtnl_link_ops ipvlan_link_ops = {
 699        .kind           = "ipvlan",
 700        .priv_size      = sizeof(struct ipvl_dev),
 701
 702        .setup          = ipvlan_link_setup,
 703        .newlink        = ipvlan_link_new,
 704        .dellink        = ipvlan_link_delete,
 705};
 706
 707int ipvlan_link_register(struct rtnl_link_ops *ops)
 708{
 709        ops->get_size   = ipvlan_nl_getsize;
 710        ops->policy     = ipvlan_nl_policy;
 711        ops->validate   = ipvlan_nl_validate;
 712        ops->fill_info  = ipvlan_nl_fillinfo;
 713        ops->changelink = ipvlan_nl_changelink;
 714        ops->maxtype    = IFLA_IPVLAN_MAX;
 715        return rtnl_link_register(ops);
 716}
 717EXPORT_SYMBOL_GPL(ipvlan_link_register);
 718
 719static int ipvlan_device_event(struct notifier_block *unused,
 720                               unsigned long event, void *ptr)
 721{
 722        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 723        struct ipvl_dev *ipvlan, *next;
 724        struct ipvl_port *port;
 725        LIST_HEAD(lst_kill);
 726
 727        if (!netif_is_ipvlan_port(dev))
 728                return NOTIFY_DONE;
 729
 730        port = ipvlan_port_get_rtnl(dev);
 731
 732        switch (event) {
 733        case NETDEV_CHANGE:
 734                list_for_each_entry(ipvlan, &port->ipvlans, pnode)
 735                        netif_stacked_transfer_operstate(ipvlan->phy_dev,
 736                                                         ipvlan->dev);
 737                break;
 738
 739        case NETDEV_REGISTER: {
 740                struct net *oldnet, *newnet = dev_net(dev);
 741                struct ipvlan_netns *old_vnet;
 742
 743                oldnet = read_pnet(&port->pnet);
 744                if (net_eq(newnet, oldnet))
 745                        break;
 746
 747                write_pnet(&port->pnet, newnet);
 748
 749                old_vnet = net_generic(oldnet, ipvlan_netid);
 750                if (!old_vnet->ipvl_nf_hook_refcnt)
 751                        break;
 752
 753                ipvlan_register_nf_hook(newnet);
 754                ipvlan_unregister_nf_hook(oldnet);
 755                break;
 756        }
 757        case NETDEV_UNREGISTER:
 758                if (dev->reg_state != NETREG_UNREGISTERING)
 759                        break;
 760
 761                list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
 762                                         pnode)
 763                        ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
 764                                                            &lst_kill);
 765                unregister_netdevice_many(&lst_kill);
 766                break;
 767
 768        case NETDEV_FEAT_CHANGE:
 769                list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
 770                        ipvlan->dev->features = dev->features & IPVLAN_FEATURES;
 771                        ipvlan->dev->gso_max_size = dev->gso_max_size;
 772                        ipvlan->dev->gso_max_segs = dev->gso_max_segs;
 773                        netdev_features_change(ipvlan->dev);
 774                }
 775                break;
 776
 777        case NETDEV_CHANGEMTU:
 778                list_for_each_entry(ipvlan, &port->ipvlans, pnode)
 779                        ipvlan_adjust_mtu(ipvlan, dev);
 780                break;
 781
 782        case NETDEV_CHANGEADDR:
 783                list_for_each_entry(ipvlan, &port->ipvlans, pnode)
 784                        ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
 785                break;
 786
 787        case NETDEV_PRE_TYPE_CHANGE:
 788                /* Forbid underlying device to change its type. */
 789                return NOTIFY_BAD;
 790        }
 791        return NOTIFY_DONE;
 792}
 793
 794static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 795{
 796        struct ipvl_addr *addr;
 797
 798        addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
 799        if (!addr)
 800                return -ENOMEM;
 801
 802        addr->master = ipvlan;
 803        if (is_v6) {
 804                memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
 805                addr->atype = IPVL_IPV6;
 806        } else {
 807                memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
 808                addr->atype = IPVL_IPV4;
 809        }
 810        list_add_tail(&addr->anode, &ipvlan->addrs);
 811
 812        /* If the interface is not up, the address will be added to the hash
 813         * list by ipvlan_open.
 814         */
 815        if (netif_running(ipvlan->dev))
 816                ipvlan_ht_addr_add(ipvlan, addr);
 817
 818        return 0;
 819}
 820
 821static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 822{
 823        struct ipvl_addr *addr;
 824
 825        addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
 826        if (!addr)
 827                return;
 828
 829        ipvlan_ht_addr_del(addr);
 830        list_del(&addr->anode);
 831        kfree_rcu(addr, rcu);
 832
 833        return;
 834}
 835
 836static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 837{
 838        if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
 839                netif_err(ipvlan, ifup, ipvlan->dev,
 840                          "Failed to add IPv6=%pI6c addr for %s intf\n",
 841                          ip6_addr, ipvlan->dev->name);
 842                return -EINVAL;
 843        }
 844
 845        return ipvlan_add_addr(ipvlan, ip6_addr, true);
 846}
 847
 848static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 849{
 850        return ipvlan_del_addr(ipvlan, ip6_addr, true);
 851}
 852
 853static bool ipvlan_is_valid_dev(const struct net_device *dev)
 854{
 855        struct ipvl_dev *ipvlan = netdev_priv(dev);
 856
 857        if (!netif_is_ipvlan(dev))
 858                return false;
 859
 860        if (!ipvlan || !ipvlan->port)
 861                return false;
 862
 863        return true;
 864}
 865
 866static int ipvlan_addr6_event(struct notifier_block *unused,
 867                              unsigned long event, void *ptr)
 868{
 869        struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
 870        struct net_device *dev = (struct net_device *)if6->idev->dev;
 871        struct ipvl_dev *ipvlan = netdev_priv(dev);
 872
 873        if (!ipvlan_is_valid_dev(dev))
 874                return NOTIFY_DONE;
 875
 876        switch (event) {
 877        case NETDEV_UP:
 878                if (ipvlan_add_addr6(ipvlan, &if6->addr))
 879                        return NOTIFY_BAD;
 880                break;
 881
 882        case NETDEV_DOWN:
 883                ipvlan_del_addr6(ipvlan, &if6->addr);
 884                break;
 885        }
 886
 887        return NOTIFY_OK;
 888}
 889
 890static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 891                                        unsigned long event, void *ptr)
 892{
 893        struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
 894        struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
 895        struct ipvl_dev *ipvlan = netdev_priv(dev);
 896
 897        /* FIXME IPv6 autoconf calls us from bh without RTNL */
 898        if (in_softirq())
 899                return NOTIFY_DONE;
 900
 901        if (!ipvlan_is_valid_dev(dev))
 902                return NOTIFY_DONE;
 903
 904        switch (event) {
 905        case NETDEV_UP:
 906                if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
 907                        NL_SET_ERR_MSG(i6vi->extack,
 908                                       "Address already assigned to an ipvlan device");
 909                        return notifier_from_errno(-EADDRINUSE);
 910                }
 911                break;
 912        }
 913
 914        return NOTIFY_OK;
 915}
 916
 917static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 918{
 919        if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
 920                netif_err(ipvlan, ifup, ipvlan->dev,
 921                          "Failed to add IPv4=%pI4 on %s intf.\n",
 922                          ip4_addr, ipvlan->dev->name);
 923                return -EINVAL;
 924        }
 925
 926        return ipvlan_add_addr(ipvlan, ip4_addr, false);
 927}
 928
 929static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 930{
 931        return ipvlan_del_addr(ipvlan, ip4_addr, false);
 932}
 933
 934static int ipvlan_addr4_event(struct notifier_block *unused,
 935                              unsigned long event, void *ptr)
 936{
 937        struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
 938        struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
 939        struct ipvl_dev *ipvlan = netdev_priv(dev);
 940        struct in_addr ip4_addr;
 941
 942        if (!ipvlan_is_valid_dev(dev))
 943                return NOTIFY_DONE;
 944
 945        switch (event) {
 946        case NETDEV_UP:
 947                ip4_addr.s_addr = if4->ifa_address;
 948                if (ipvlan_add_addr4(ipvlan, &ip4_addr))
 949                        return NOTIFY_BAD;
 950                break;
 951
 952        case NETDEV_DOWN:
 953                ip4_addr.s_addr = if4->ifa_address;
 954                ipvlan_del_addr4(ipvlan, &ip4_addr);
 955                break;
 956        }
 957
 958        return NOTIFY_OK;
 959}
 960
 961static int ipvlan_addr4_validator_event(struct notifier_block *unused,
 962                                        unsigned long event, void *ptr)
 963{
 964        struct in_validator_info *ivi = (struct in_validator_info *)ptr;
 965        struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
 966        struct ipvl_dev *ipvlan = netdev_priv(dev);
 967
 968        if (!ipvlan_is_valid_dev(dev))
 969                return NOTIFY_DONE;
 970
 971        switch (event) {
 972        case NETDEV_UP:
 973                if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
 974                        NL_SET_ERR_MSG(ivi->extack,
 975                                       "Address already assigned to an ipvlan device");
 976                        return notifier_from_errno(-EADDRINUSE);
 977                }
 978                break;
 979        }
 980
 981        return NOTIFY_OK;
 982}
 983
 984static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
 985        .notifier_call = ipvlan_addr4_event,
 986};
 987
 988static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = {
 989        .notifier_call = ipvlan_addr4_validator_event,
 990};
 991
 992static struct notifier_block ipvlan_notifier_block __read_mostly = {
 993        .notifier_call = ipvlan_device_event,
 994};
 995
 996static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 997        .notifier_call = ipvlan_addr6_event,
 998};
 999
1000static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
1001        .notifier_call = ipvlan_addr6_validator_event,
1002};
1003
1004static void ipvlan_ns_exit(struct net *net)
1005{
1006        struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
1007
1008        if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) {
1009                vnet->ipvl_nf_hook_refcnt = 0;
1010                nf_unregister_net_hooks(net, ipvl_nfops,
1011                                        ARRAY_SIZE(ipvl_nfops));
1012        }
1013}
1014
1015static struct pernet_operations ipvlan_net_ops = {
1016        .id = &ipvlan_netid,
1017        .size = sizeof(struct ipvlan_netns),
1018        .exit = ipvlan_ns_exit,
1019};
1020
1021static int __init ipvlan_init_module(void)
1022{
1023        int err;
1024
1025        ipvlan_init_secret();
1026        register_netdevice_notifier(&ipvlan_notifier_block);
1027        register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1028        register_inet6addr_validator_notifier(
1029            &ipvlan_addr6_vtor_notifier_block);
1030        register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1031        register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
1032
1033        err = register_pernet_subsys(&ipvlan_net_ops);
1034        if (err < 0)
1035                goto error;
1036
1037        err = ipvlan_link_register(&ipvlan_link_ops);
1038        if (err < 0) {
1039                unregister_pernet_subsys(&ipvlan_net_ops);
1040                goto error;
1041        }
1042
1043        return 0;
1044error:
1045        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1046        unregister_inetaddr_validator_notifier(
1047            &ipvlan_addr4_vtor_notifier_block);
1048        unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1049        unregister_inet6addr_validator_notifier(
1050            &ipvlan_addr6_vtor_notifier_block);
1051        unregister_netdevice_notifier(&ipvlan_notifier_block);
1052        return err;
1053}
1054
1055static void __exit ipvlan_cleanup_module(void)
1056{
1057        rtnl_link_unregister(&ipvlan_link_ops);
1058        unregister_pernet_subsys(&ipvlan_net_ops);
1059        unregister_netdevice_notifier(&ipvlan_notifier_block);
1060        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1061        unregister_inetaddr_validator_notifier(
1062            &ipvlan_addr4_vtor_notifier_block);
1063        unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1064        unregister_inet6addr_validator_notifier(
1065            &ipvlan_addr6_vtor_notifier_block);
1066}
1067
1068module_init(ipvlan_init_module);
1069module_exit(ipvlan_cleanup_module);
1070
1071MODULE_LICENSE("GPL");
1072MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
1073MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
1074MODULE_ALIAS_RTNL_LINK("ipvlan");
1075