linux/net/core/netpoll.c
<<
>>
Prefs
   1/*
   2 * Common framework for low-level network console, dump, and debugger code
   3 *
   4 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
   5 *
   6 * based on the netconsole code from:
   7 *
   8 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
   9 * Copyright (C) 2002  Red Hat, Inc.
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/moduleparam.h>
  15#include <linux/netdevice.h>
  16#include <linux/etherdevice.h>
  17#include <linux/string.h>
  18#include <linux/if_arp.h>
  19#include <linux/inetdevice.h>
  20#include <linux/inet.h>
  21#include <linux/interrupt.h>
  22#include <linux/netpoll.h>
  23#include <linux/sched.h>
  24#include <linux/delay.h>
  25#include <linux/rcupdate.h>
  26#include <linux/workqueue.h>
  27#include <linux/slab.h>
  28#include <linux/export.h>
  29#include <linux/if_vlan.h>
  30#include <net/tcp.h>
  31#include <net/udp.h>
  32#include <net/addrconf.h>
  33#include <net/ndisc.h>
  34#include <net/ip6_checksum.h>
  35#include <asm/unaligned.h>
  36#include <trace/events/napi.h>
  37
  38/*
  39 * We maintain a small pool of fully-sized skbs, to make sure the
  40 * message gets out even in extreme OOM situations.
  41 */
  42
  43#define MAX_UDP_CHUNK 1460
  44#define MAX_SKBS 32
  45
  46static struct sk_buff_head skb_pool;
  47
  48static atomic_t trapped;
  49
  50DEFINE_STATIC_SRCU(netpoll_srcu);
  51
  52#define USEC_PER_POLL   50
  53#define NETPOLL_RX_ENABLED  1
  54#define NETPOLL_RX_DROP     2
  55
  56#define MAX_SKB_SIZE                                                    \
  57        (sizeof(struct ethhdr) +                                        \
  58         sizeof(struct iphdr) +                                         \
  59         sizeof(struct udphdr) +                                        \
  60         MAX_UDP_CHUNK)
  61
  62static void zap_completion_queue(void);
  63static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
  64static void netpoll_async_cleanup(struct work_struct *work);
  65
  66static unsigned int carrier_timeout = 4;
  67module_param(carrier_timeout, uint, 0644);
  68
  69#define np_info(np, fmt, ...)                           \
  70        pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
  71#define np_err(np, fmt, ...)                            \
  72        pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
  73#define np_notice(np, fmt, ...)                         \
  74        pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
  75
  76static void queue_process(struct work_struct *work)
  77{
  78        struct netpoll_info *npinfo =
  79                container_of(work, struct netpoll_info, tx_work.work);
  80        struct sk_buff *skb;
  81        unsigned long flags;
  82
  83        while ((skb = skb_dequeue(&npinfo->txq))) {
  84                struct net_device *dev = skb->dev;
  85                const struct net_device_ops *ops = dev->netdev_ops;
  86                struct netdev_queue *txq;
  87
  88                if (!netif_device_present(dev) || !netif_running(dev)) {
  89                        __kfree_skb(skb);
  90                        continue;
  91                }
  92
  93                txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
  94
  95                local_irq_save(flags);
  96                __netif_tx_lock(txq, smp_processor_id());
  97                if (netif_xmit_frozen_or_stopped(txq) ||
  98                    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
  99                        skb_queue_head(&npinfo->txq, skb);
 100                        __netif_tx_unlock(txq);
 101                        local_irq_restore(flags);
 102
 103                        schedule_delayed_work(&npinfo->tx_work, HZ/10);
 104                        return;
 105                }
 106                __netif_tx_unlock(txq);
 107                local_irq_restore(flags);
 108        }
 109}
 110
 111static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 112                            unsigned short ulen, __be32 saddr, __be32 daddr)
 113{
 114        __wsum psum;
 115
 116        if (uh->check == 0 || skb_csum_unnecessary(skb))
 117                return 0;
 118
 119        psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 120
 121        if (skb->ip_summed == CHECKSUM_COMPLETE &&
 122            !csum_fold(csum_add(psum, skb->csum)))
 123                return 0;
 124
 125        skb->csum = psum;
 126
 127        return __skb_checksum_complete(skb);
 128}
 129
 130/*
 131 * Check whether delayed processing was scheduled for our NIC. If so,
 132 * we attempt to grab the poll lock and use ->poll() to pump the card.
 133 * If this fails, either we've recursed in ->poll() or it's already
 134 * running on another CPU.
 135 *
 136 * Note: we don't mask interrupts with this lock because we're using
 137 * trylock here and interrupts are already disabled in the softirq
 138 * case. Further, we test the poll_owner to avoid recursion on UP
 139 * systems where the lock doesn't exist.
 140 *
 141 * In cases where there is bi-directional communications, reading only
 142 * one message at a time can lead to packets being dropped by the
 143 * network adapter, forcing superfluous retries and possibly timeouts.
 144 * Thus, we set our budget to greater than 1.
 145 */
 146static int poll_one_napi(struct netpoll_info *npinfo,
 147                         struct napi_struct *napi, int budget)
 148{
 149        int work;
 150
 151        /* net_rx_action's ->poll() invocations and our's are
 152         * synchronized by this test which is only made while
 153         * holding the napi->poll_lock.
 154         */
 155        if (!test_bit(NAPI_STATE_SCHED, &napi->state))
 156                return budget;
 157
 158        npinfo->rx_flags |= NETPOLL_RX_DROP;
 159        atomic_inc(&trapped);
 160        set_bit(NAPI_STATE_NPSVC, &napi->state);
 161
 162        work = napi->poll(napi, budget);
 163        trace_napi_poll(napi);
 164
 165        clear_bit(NAPI_STATE_NPSVC, &napi->state);
 166        atomic_dec(&trapped);
 167        npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 168
 169        return budget - work;
 170}
 171
 172static void poll_napi(struct net_device *dev)
 173{
 174        struct napi_struct *napi;
 175        int budget = 16;
 176
 177        list_for_each_entry(napi, &dev->napi_list, dev_list) {
 178                if (napi->poll_owner != smp_processor_id() &&
 179                    spin_trylock(&napi->poll_lock)) {
 180                        budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
 181                                               napi, budget);
 182                        spin_unlock(&napi->poll_lock);
 183
 184                        if (!budget)
 185                                break;
 186                }
 187        }
 188}
 189
 190static void service_neigh_queue(struct netpoll_info *npi)
 191{
 192        if (npi) {
 193                struct sk_buff *skb;
 194
 195                while ((skb = skb_dequeue(&npi->neigh_tx)))
 196                        netpoll_neigh_reply(skb, npi);
 197        }
 198}
 199
 200static void netpoll_poll_dev(struct net_device *dev)
 201{
 202        const struct net_device_ops *ops;
 203        struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 204
 205        /* Don't do any rx activity if the dev_lock mutex is held
 206         * the dev_open/close paths use this to block netpoll activity
 207         * while changing device state
 208         */
 209        if (down_trylock(&ni->dev_lock))
 210                return;
 211
 212        if (!netif_running(dev)) {
 213                up(&ni->dev_lock);
 214                return;
 215        }
 216
 217        ops = dev->netdev_ops;
 218        if (!ops->ndo_poll_controller) {
 219                up(&ni->dev_lock);
 220                return;
 221        }
 222
 223        /* Process pending work on NIC */
 224        ops->ndo_poll_controller(dev);
 225
 226        poll_napi(dev);
 227
 228        up(&ni->dev_lock);
 229
 230        if (dev->flags & IFF_SLAVE) {
 231                if (ni) {
 232                        struct net_device *bond_dev;
 233                        struct sk_buff *skb;
 234                        struct netpoll_info *bond_ni;
 235
 236                        bond_dev = netdev_master_upper_dev_get_rcu(dev);
 237                        bond_ni = rcu_dereference_bh(bond_dev->npinfo);
 238                        while ((skb = skb_dequeue(&ni->neigh_tx))) {
 239                                skb->dev = bond_dev;
 240                                skb_queue_tail(&bond_ni->neigh_tx, skb);
 241                        }
 242                }
 243        }
 244
 245        service_neigh_queue(ni);
 246
 247        zap_completion_queue();
 248}
 249
 250int netpoll_rx_disable(struct net_device *dev)
 251{
 252        struct netpoll_info *ni;
 253        int idx;
 254        might_sleep();
 255        idx = srcu_read_lock(&netpoll_srcu);
 256        ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
 257        if (ni)
 258                down(&ni->dev_lock);
 259        srcu_read_unlock(&netpoll_srcu, idx);
 260        return 0;
 261}
 262EXPORT_SYMBOL(netpoll_rx_disable);
 263
 264void netpoll_rx_enable(struct net_device *dev)
 265{
 266        struct netpoll_info *ni;
 267        rcu_read_lock();
 268        ni = rcu_dereference(dev->npinfo);
 269        if (ni)
 270                up(&ni->dev_lock);
 271        rcu_read_unlock();
 272}
 273EXPORT_SYMBOL(netpoll_rx_enable);
 274
 275static void refill_skbs(void)
 276{
 277        struct sk_buff *skb;
 278        unsigned long flags;
 279
 280        spin_lock_irqsave(&skb_pool.lock, flags);
 281        while (skb_pool.qlen < MAX_SKBS) {
 282                skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
 283                if (!skb)
 284                        break;
 285
 286                __skb_queue_tail(&skb_pool, skb);
 287        }
 288        spin_unlock_irqrestore(&skb_pool.lock, flags);
 289}
 290
 291static void zap_completion_queue(void)
 292{
 293        unsigned long flags;
 294        struct softnet_data *sd = &get_cpu_var(softnet_data);
 295
 296        if (sd->completion_queue) {
 297                struct sk_buff *clist;
 298
 299                local_irq_save(flags);
 300                clist = sd->completion_queue;
 301                sd->completion_queue = NULL;
 302                local_irq_restore(flags);
 303
 304                while (clist != NULL) {
 305                        struct sk_buff *skb = clist;
 306                        clist = clist->next;
 307                        if (skb->destructor) {
 308                                atomic_inc(&skb->users);
 309                                dev_kfree_skb_any(skb); /* put this one back */
 310                        } else {
 311                                __kfree_skb(skb);
 312                        }
 313                }
 314        }
 315
 316        put_cpu_var(softnet_data);
 317}
 318
 319static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
 320{
 321        int count = 0;
 322        struct sk_buff *skb;
 323
 324        zap_completion_queue();
 325        refill_skbs();
 326repeat:
 327
 328        skb = alloc_skb(len, GFP_ATOMIC);
 329        if (!skb)
 330                skb = skb_dequeue(&skb_pool);
 331
 332        if (!skb) {
 333                if (++count < 10) {
 334                        netpoll_poll_dev(np->dev);
 335                        goto repeat;
 336                }
 337                return NULL;
 338        }
 339
 340        atomic_set(&skb->users, 1);
 341        skb_reserve(skb, reserve);
 342        return skb;
 343}
 344
 345static int netpoll_owner_active(struct net_device *dev)
 346{
 347        struct napi_struct *napi;
 348
 349        list_for_each_entry(napi, &dev->napi_list, dev_list) {
 350                if (napi->poll_owner == smp_processor_id())
 351                        return 1;
 352        }
 353        return 0;
 354}
 355
 356/* call with IRQ disabled */
 357void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 358                             struct net_device *dev)
 359{
 360        int status = NETDEV_TX_BUSY;
 361        unsigned long tries;
 362        const struct net_device_ops *ops = dev->netdev_ops;
 363        /* It is up to the caller to keep npinfo alive. */
 364        struct netpoll_info *npinfo;
 365
 366        WARN_ON_ONCE(!irqs_disabled());
 367
 368        npinfo = rcu_dereference_bh(np->dev->npinfo);
 369        if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 370                __kfree_skb(skb);
 371                return;
 372        }
 373
 374        /* don't get messages out of order, and no recursion */
 375        if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 376                struct netdev_queue *txq;
 377
 378                txq = netdev_pick_tx(dev, skb);
 379
 380                /* try until next clock tick */
 381                for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 382                     tries > 0; --tries) {
 383                        if (__netif_tx_trylock(txq)) {
 384                                if (!netif_xmit_stopped(txq)) {
 385                                        if (vlan_tx_tag_present(skb) &&
 386                                            !vlan_hw_offload_capable(netif_skb_features(skb),
 387                                                                     skb->vlan_proto)) {
 388                                                skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
 389                                                if (unlikely(!skb))
 390                                                        break;
 391                                                skb->vlan_tci = 0;
 392                                        }
 393
 394                                        status = ops->ndo_start_xmit(skb, dev);
 395                                        if (status == NETDEV_TX_OK)
 396                                                txq_trans_update(txq);
 397                                }
 398                                __netif_tx_unlock(txq);
 399
 400                                if (status == NETDEV_TX_OK)
 401                                        break;
 402
 403                        }
 404
 405                        /* tickle device maybe there is some cleanup */
 406                        netpoll_poll_dev(np->dev);
 407
 408                        udelay(USEC_PER_POLL);
 409                }
 410
 411                WARN_ONCE(!irqs_disabled(),
 412                        "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
 413                        dev->name, ops->ndo_start_xmit);
 414
 415        }
 416
 417        if (status != NETDEV_TX_OK) {
 418                skb_queue_tail(&npinfo->txq, skb);
 419                schedule_delayed_work(&npinfo->tx_work,0);
 420        }
 421}
 422EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 423
 424void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 425{
 426        int total_len, ip_len, udp_len;
 427        struct sk_buff *skb;
 428        struct udphdr *udph;
 429        struct iphdr *iph;
 430        struct ethhdr *eth;
 431        static atomic_t ip_ident;
 432        struct ipv6hdr *ip6h;
 433
 434        udp_len = len + sizeof(*udph);
 435        if (np->ipv6)
 436                ip_len = udp_len + sizeof(*ip6h);
 437        else
 438                ip_len = udp_len + sizeof(*iph);
 439
 440        total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 441
 442        skb = find_skb(np, total_len + np->dev->needed_tailroom,
 443                       total_len - len);
 444        if (!skb)
 445                return;
 446
 447        skb_copy_to_linear_data(skb, msg, len);
 448        skb_put(skb, len);
 449
 450        skb_push(skb, sizeof(*udph));
 451        skb_reset_transport_header(skb);
 452        udph = udp_hdr(skb);
 453        udph->source = htons(np->local_port);
 454        udph->dest = htons(np->remote_port);
 455        udph->len = htons(udp_len);
 456
 457        if (np->ipv6) {
 458                udph->check = 0;
 459                udph->check = csum_ipv6_magic(&np->local_ip.in6,
 460                                              &np->remote_ip.in6,
 461                                              udp_len, IPPROTO_UDP,
 462                                              csum_partial(udph, udp_len, 0));
 463                if (udph->check == 0)
 464                        udph->check = CSUM_MANGLED_0;
 465
 466                skb_push(skb, sizeof(*ip6h));
 467                skb_reset_network_header(skb);
 468                ip6h = ipv6_hdr(skb);
 469
 470                /* ip6h->version = 6; ip6h->priority = 0; */
 471                put_unaligned(0x60, (unsigned char *)ip6h);
 472                ip6h->flow_lbl[0] = 0;
 473                ip6h->flow_lbl[1] = 0;
 474                ip6h->flow_lbl[2] = 0;
 475
 476                ip6h->payload_len = htons(sizeof(struct udphdr) + len);
 477                ip6h->nexthdr = IPPROTO_UDP;
 478                ip6h->hop_limit = 32;
 479                ip6h->saddr = np->local_ip.in6;
 480                ip6h->daddr = np->remote_ip.in6;
 481
 482                eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 483                skb_reset_mac_header(skb);
 484                skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
 485        } else {
 486                udph->check = 0;
 487                udph->check = csum_tcpudp_magic(np->local_ip.ip,
 488                                                np->remote_ip.ip,
 489                                                udp_len, IPPROTO_UDP,
 490                                                csum_partial(udph, udp_len, 0));
 491                if (udph->check == 0)
 492                        udph->check = CSUM_MANGLED_0;
 493
 494                skb_push(skb, sizeof(*iph));
 495                skb_reset_network_header(skb);
 496                iph = ip_hdr(skb);
 497
 498                /* iph->version = 4; iph->ihl = 5; */
 499                put_unaligned(0x45, (unsigned char *)iph);
 500                iph->tos      = 0;
 501                put_unaligned(htons(ip_len), &(iph->tot_len));
 502                iph->id       = htons(atomic_inc_return(&ip_ident));
 503                iph->frag_off = 0;
 504                iph->ttl      = 64;
 505                iph->protocol = IPPROTO_UDP;
 506                iph->check    = 0;
 507                put_unaligned(np->local_ip.ip, &(iph->saddr));
 508                put_unaligned(np->remote_ip.ip, &(iph->daddr));
 509                iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 510
 511                eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 512                skb_reset_mac_header(skb);
 513                skb->protocol = eth->h_proto = htons(ETH_P_IP);
 514        }
 515
 516        memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
 517        memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 518
 519        skb->dev = np->dev;
 520
 521        netpoll_send_skb(np, skb);
 522}
 523EXPORT_SYMBOL(netpoll_send_udp);
 524
 525static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 526{
 527        int size, type = ARPOP_REPLY;
 528        __be32 sip, tip;
 529        unsigned char *sha;
 530        struct sk_buff *send_skb;
 531        struct netpoll *np, *tmp;
 532        unsigned long flags;
 533        int hlen, tlen;
 534        int hits = 0, proto;
 535
 536        if (list_empty(&npinfo->rx_np))
 537                return;
 538
 539        /* Before checking the packet, we do some early
 540           inspection whether this is interesting at all */
 541        spin_lock_irqsave(&npinfo->rx_lock, flags);
 542        list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 543                if (np->dev == skb->dev)
 544                        hits++;
 545        }
 546        spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 547
 548        /* No netpoll struct is using this dev */
 549        if (!hits)
 550                return;
 551
 552        proto = ntohs(eth_hdr(skb)->h_proto);
 553        if (proto == ETH_P_IP) {
 554                struct arphdr *arp;
 555                unsigned char *arp_ptr;
 556                /* No arp on this interface */
 557                if (skb->dev->flags & IFF_NOARP)
 558                        return;
 559
 560                if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 561                        return;
 562
 563                skb_reset_network_header(skb);
 564                skb_reset_transport_header(skb);
 565                arp = arp_hdr(skb);
 566
 567                if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
 568                     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
 569                    arp->ar_pro != htons(ETH_P_IP) ||
 570                    arp->ar_op != htons(ARPOP_REQUEST))
 571                        return;
 572
 573                arp_ptr = (unsigned char *)(arp+1);
 574                /* save the location of the src hw addr */
 575                sha = arp_ptr;
 576                arp_ptr += skb->dev->addr_len;
 577                memcpy(&sip, arp_ptr, 4);
 578                arp_ptr += 4;
 579                /* If we actually cared about dst hw addr,
 580                   it would get copied here */
 581                arp_ptr += skb->dev->addr_len;
 582                memcpy(&tip, arp_ptr, 4);
 583
 584                /* Should we ignore arp? */
 585                if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 586                        return;
 587
 588                size = arp_hdr_len(skb->dev);
 589
 590                spin_lock_irqsave(&npinfo->rx_lock, flags);
 591                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 592                        if (tip != np->local_ip.ip)
 593                                continue;
 594
 595                        hlen = LL_RESERVED_SPACE(np->dev);
 596                        tlen = np->dev->needed_tailroom;
 597                        send_skb = find_skb(np, size + hlen + tlen, hlen);
 598                        if (!send_skb)
 599                                continue;
 600
 601                        skb_reset_network_header(send_skb);
 602                        arp = (struct arphdr *) skb_put(send_skb, size);
 603                        send_skb->dev = skb->dev;
 604                        send_skb->protocol = htons(ETH_P_ARP);
 605
 606                        /* Fill the device header for the ARP frame */
 607                        if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
 608                                            sha, np->dev->dev_addr,
 609                                            send_skb->len) < 0) {
 610                                kfree_skb(send_skb);
 611                                continue;
 612                        }
 613
 614                        /*
 615                         * Fill out the arp protocol part.
 616                         *
 617                         * we only support ethernet device type,
 618                         * which (according to RFC 1390) should
 619                         * always equal 1 (Ethernet).
 620                         */
 621
 622                        arp->ar_hrd = htons(np->dev->type);
 623                        arp->ar_pro = htons(ETH_P_IP);
 624                        arp->ar_hln = np->dev->addr_len;
 625                        arp->ar_pln = 4;
 626                        arp->ar_op = htons(type);
 627
 628                        arp_ptr = (unsigned char *)(arp + 1);
 629                        memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
 630                        arp_ptr += np->dev->addr_len;
 631                        memcpy(arp_ptr, &tip, 4);
 632                        arp_ptr += 4;
 633                        memcpy(arp_ptr, sha, np->dev->addr_len);
 634                        arp_ptr += np->dev->addr_len;
 635                        memcpy(arp_ptr, &sip, 4);
 636
 637                        netpoll_send_skb(np, send_skb);
 638
 639                        /* If there are several rx_hooks for the same address,
 640                           we're fine by sending a single reply */
 641                        break;
 642                }
 643                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 644        } else if( proto == ETH_P_IPV6) {
 645#if IS_ENABLED(CONFIG_IPV6)
 646                struct nd_msg *msg;
 647                u8 *lladdr = NULL;
 648                struct ipv6hdr *hdr;
 649                struct icmp6hdr *icmp6h;
 650                const struct in6_addr *saddr;
 651                const struct in6_addr *daddr;
 652                struct inet6_dev *in6_dev = NULL;
 653                struct in6_addr *target;
 654
 655                in6_dev = in6_dev_get(skb->dev);
 656                if (!in6_dev || !in6_dev->cnf.accept_ra)
 657                        return;
 658
 659                if (!pskb_may_pull(skb, skb->len))
 660                        return;
 661
 662                msg = (struct nd_msg *)skb_transport_header(skb);
 663
 664                __skb_push(skb, skb->data - skb_transport_header(skb));
 665
 666                if (ipv6_hdr(skb)->hop_limit != 255)
 667                        return;
 668                if (msg->icmph.icmp6_code != 0)
 669                        return;
 670                if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
 671                        return;
 672
 673                saddr = &ipv6_hdr(skb)->saddr;
 674                daddr = &ipv6_hdr(skb)->daddr;
 675
 676                size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
 677
 678                spin_lock_irqsave(&npinfo->rx_lock, flags);
 679                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 680                        if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
 681                                continue;
 682
 683                        hlen = LL_RESERVED_SPACE(np->dev);
 684                        tlen = np->dev->needed_tailroom;
 685                        send_skb = find_skb(np, size + hlen + tlen, hlen);
 686                        if (!send_skb)
 687                                continue;
 688
 689                        send_skb->protocol = htons(ETH_P_IPV6);
 690                        send_skb->dev = skb->dev;
 691
 692                        skb_reset_network_header(send_skb);
 693                        skb_put(send_skb, sizeof(struct ipv6hdr));
 694                        hdr = ipv6_hdr(send_skb);
 695
 696                        *(__be32*)hdr = htonl(0x60000000);
 697
 698                        hdr->payload_len = htons(size);
 699                        hdr->nexthdr = IPPROTO_ICMPV6;
 700                        hdr->hop_limit = 255;
 701                        hdr->saddr = *saddr;
 702                        hdr->daddr = *daddr;
 703
 704                        send_skb->transport_header = send_skb->tail;
 705                        skb_put(send_skb, size);
 706
 707                        icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
 708                        icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
 709                        icmp6h->icmp6_router = 0;
 710                        icmp6h->icmp6_solicited = 1;
 711                        target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
 712                        *target = msg->target;
 713                        icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
 714                                                              IPPROTO_ICMPV6,
 715                                                              csum_partial(icmp6h,
 716                                                                           size, 0));
 717
 718                        if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
 719                                            lladdr, np->dev->dev_addr,
 720                                            send_skb->len) < 0) {
 721                                kfree_skb(send_skb);
 722                                continue;
 723                        }
 724
 725                        netpoll_send_skb(np, send_skb);
 726
 727                        /* If there are several rx_hooks for the same address,
 728                           we're fine by sending a single reply */
 729                        break;
 730                }
 731                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 732#endif
 733        }
 734}
 735
 736static bool pkt_is_ns(struct sk_buff *skb)
 737{
 738        struct nd_msg *msg;
 739        struct ipv6hdr *hdr;
 740
 741        if (skb->protocol != htons(ETH_P_ARP))
 742                return false;
 743        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
 744                return false;
 745
 746        msg = (struct nd_msg *)skb_transport_header(skb);
 747        __skb_push(skb, skb->data - skb_transport_header(skb));
 748        hdr = ipv6_hdr(skb);
 749
 750        if (hdr->nexthdr != IPPROTO_ICMPV6)
 751                return false;
 752        if (hdr->hop_limit != 255)
 753                return false;
 754        if (msg->icmph.icmp6_code != 0)
 755                return false;
 756        if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
 757                return false;
 758
 759        return true;
 760}
 761
 762int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 763{
 764        int proto, len, ulen;
 765        int hits = 0;
 766        const struct iphdr *iph;
 767        struct udphdr *uh;
 768        struct netpoll *np, *tmp;
 769
 770        if (list_empty(&npinfo->rx_np))
 771                goto out;
 772
 773        if (skb->dev->type != ARPHRD_ETHER)
 774                goto out;
 775
 776        /* check if netpoll clients need ARP */
 777        if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
 778                skb_queue_tail(&npinfo->neigh_tx, skb);
 779                return 1;
 780        } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
 781                skb_queue_tail(&npinfo->neigh_tx, skb);
 782                return 1;
 783        }
 784
 785        if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
 786                skb = vlan_untag(skb);
 787                if (unlikely(!skb))
 788                        goto out;
 789        }
 790
 791        proto = ntohs(eth_hdr(skb)->h_proto);
 792        if (proto != ETH_P_IP && proto != ETH_P_IPV6)
 793                goto out;
 794        if (skb->pkt_type == PACKET_OTHERHOST)
 795                goto out;
 796        if (skb_shared(skb))
 797                goto out;
 798
 799        if (proto == ETH_P_IP) {
 800                if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 801                        goto out;
 802                iph = (struct iphdr *)skb->data;
 803                if (iph->ihl < 5 || iph->version != 4)
 804                        goto out;
 805                if (!pskb_may_pull(skb, iph->ihl*4))
 806                        goto out;
 807                iph = (struct iphdr *)skb->data;
 808                if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
 809                        goto out;
 810
 811                len = ntohs(iph->tot_len);
 812                if (skb->len < len || len < iph->ihl*4)
 813                        goto out;
 814
 815                /*
 816                 * Our transport medium may have padded the buffer out.
 817                 * Now We trim to the true length of the frame.
 818                 */
 819                if (pskb_trim_rcsum(skb, len))
 820                        goto out;
 821
 822                iph = (struct iphdr *)skb->data;
 823                if (iph->protocol != IPPROTO_UDP)
 824                        goto out;
 825
 826                len -= iph->ihl*4;
 827                uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
 828                ulen = ntohs(uh->len);
 829
 830                if (ulen != len)
 831                        goto out;
 832                if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 833                        goto out;
 834                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 835                        if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
 836                                continue;
 837                        if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
 838                                continue;
 839                        if (np->local_port && np->local_port != ntohs(uh->dest))
 840                                continue;
 841
 842                        np->rx_hook(np, ntohs(uh->source),
 843                                       (char *)(uh+1),
 844                                       ulen - sizeof(struct udphdr));
 845                        hits++;
 846                }
 847        } else {
 848#if IS_ENABLED(CONFIG_IPV6)
 849                const struct ipv6hdr *ip6h;
 850
 851                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 852                        goto out;
 853                ip6h = (struct ipv6hdr *)skb->data;
 854                if (ip6h->version != 6)
 855                        goto out;
 856                len = ntohs(ip6h->payload_len);
 857                if (!len)
 858                        goto out;
 859                if (len + sizeof(struct ipv6hdr) > skb->len)
 860                        goto out;
 861                if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
 862                        goto out;
 863                ip6h = ipv6_hdr(skb);
 864                if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 865                        goto out;
 866                uh = udp_hdr(skb);
 867                ulen = ntohs(uh->len);
 868                if (ulen != skb->len)
 869                        goto out;
 870                if (udp6_csum_init(skb, uh, IPPROTO_UDP))
 871                        goto out;
 872                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 873                        if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
 874                                continue;
 875                        if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
 876                                continue;
 877                        if (np->local_port && np->local_port != ntohs(uh->dest))
 878                                continue;
 879
 880                        np->rx_hook(np, ntohs(uh->source),
 881                                       (char *)(uh+1),
 882                                       ulen - sizeof(struct udphdr));
 883                        hits++;
 884                }
 885#endif
 886        }
 887
 888        if (!hits)
 889                goto out;
 890
 891        kfree_skb(skb);
 892        return 1;
 893
 894out:
 895        if (atomic_read(&trapped)) {
 896                kfree_skb(skb);
 897                return 1;
 898        }
 899
 900        return 0;
 901}
 902
 903void netpoll_print_options(struct netpoll *np)
 904{
 905        np_info(np, "local port %d\n", np->local_port);
 906        if (np->ipv6)
 907                np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
 908        else
 909                np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
 910        np_info(np, "interface '%s'\n", np->dev_name);
 911        np_info(np, "remote port %d\n", np->remote_port);
 912        if (np->ipv6)
 913                np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
 914        else
 915                np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
 916        np_info(np, "remote ethernet address %pM\n", np->remote_mac);
 917}
 918EXPORT_SYMBOL(netpoll_print_options);
 919
 920static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
 921{
 922        const char *end;
 923
 924        if (!strchr(str, ':') &&
 925            in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
 926                if (!*end)
 927                        return 0;
 928        }
 929        if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
 930#if IS_ENABLED(CONFIG_IPV6)
 931                if (!*end)
 932                        return 1;
 933#else
 934                return -1;
 935#endif
 936        }
 937        return -1;
 938}
 939
 940int netpoll_parse_options(struct netpoll *np, char *opt)
 941{
 942        char *cur=opt, *delim;
 943        int ipv6;
 944
 945        if (*cur != '@') {
 946                if ((delim = strchr(cur, '@')) == NULL)
 947                        goto parse_failed;
 948                *delim = 0;
 949                if (kstrtou16(cur, 10, &np->local_port))
 950                        goto parse_failed;
 951                cur = delim;
 952        }
 953        cur++;
 954
 955        if (*cur != '/') {
 956                if ((delim = strchr(cur, '/')) == NULL)
 957                        goto parse_failed;
 958                *delim = 0;
 959                ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
 960                if (ipv6 < 0)
 961                        goto parse_failed;
 962                else
 963                        np->ipv6 = (bool)ipv6;
 964                cur = delim;
 965        }
 966        cur++;
 967
 968        if (*cur != ',') {
 969                /* parse out dev name */
 970                if ((delim = strchr(cur, ',')) == NULL)
 971                        goto parse_failed;
 972                *delim = 0;
 973                strlcpy(np->dev_name, cur, sizeof(np->dev_name));
 974                cur = delim;
 975        }
 976        cur++;
 977
 978        if (*cur != '@') {
 979                /* dst port */
 980                if ((delim = strchr(cur, '@')) == NULL)
 981                        goto parse_failed;
 982                *delim = 0;
 983                if (*cur == ' ' || *cur == '\t')
 984                        np_info(np, "warning: whitespace is not allowed\n");
 985                if (kstrtou16(cur, 10, &np->remote_port))
 986                        goto parse_failed;
 987                cur = delim;
 988        }
 989        cur++;
 990
 991        /* dst ip */
 992        if ((delim = strchr(cur, '/')) == NULL)
 993                goto parse_failed;
 994        *delim = 0;
 995        ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
 996        if (ipv6 < 0)
 997                goto parse_failed;
 998        else if (np->ipv6 != (bool)ipv6)
 999                goto parse_failed;
1000        else
1001                np->ipv6 = (bool)ipv6;
1002        cur = delim + 1;
1003
1004        if (*cur != 0) {
1005                /* MAC address */
1006                if (!mac_pton(cur, np->remote_mac))
1007                        goto parse_failed;
1008        }
1009
1010        netpoll_print_options(np);
1011
1012        return 0;
1013
1014 parse_failed:
1015        np_info(np, "couldn't parse config at '%s'!\n", cur);
1016        return -1;
1017}
1018EXPORT_SYMBOL(netpoll_parse_options);
1019
1020int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1021{
1022        struct netpoll_info *npinfo;
1023        const struct net_device_ops *ops;
1024        unsigned long flags;
1025        int err;
1026
1027        np->dev = ndev;
1028        strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1029        INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
1030
1031        if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
1032            !ndev->netdev_ops->ndo_poll_controller) {
1033                np_err(np, "%s doesn't support polling, aborting\n",
1034                       np->dev_name);
1035                err = -ENOTSUPP;
1036                goto out;
1037        }
1038
1039        if (!ndev->npinfo) {
1040                npinfo = kmalloc(sizeof(*npinfo), gfp);
1041                if (!npinfo) {
1042                        err = -ENOMEM;
1043                        goto out;
1044                }
1045
1046                npinfo->rx_flags = 0;
1047                INIT_LIST_HEAD(&npinfo->rx_np);
1048
1049                spin_lock_init(&npinfo->rx_lock);
1050                sema_init(&npinfo->dev_lock, 1);
1051                skb_queue_head_init(&npinfo->neigh_tx);
1052                skb_queue_head_init(&npinfo->txq);
1053                INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
1054
1055                atomic_set(&npinfo->refcnt, 1);
1056
1057                ops = np->dev->netdev_ops;
1058                if (ops->ndo_netpoll_setup) {
1059                        err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1060                        if (err)
1061                                goto free_npinfo;
1062                }
1063        } else {
1064                npinfo = rtnl_dereference(ndev->npinfo);
1065                atomic_inc(&npinfo->refcnt);
1066        }
1067
1068        npinfo->netpoll = np;
1069
1070        if (np->rx_hook) {
1071                spin_lock_irqsave(&npinfo->rx_lock, flags);
1072                npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1073                list_add_tail(&np->rx, &npinfo->rx_np);
1074                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1075        }
1076
1077        /* last thing to do is link it to the net device structure */
1078        rcu_assign_pointer(ndev->npinfo, npinfo);
1079
1080        return 0;
1081
1082free_npinfo:
1083        kfree(npinfo);
1084out:
1085        return err;
1086}
1087EXPORT_SYMBOL_GPL(__netpoll_setup);
1088
1089int netpoll_setup(struct netpoll *np)
1090{
1091        struct net_device *ndev = NULL;
1092        struct in_device *in_dev;
1093        int err;
1094
1095        rtnl_lock();
1096        if (np->dev_name) {
1097                struct net *net = current->nsproxy->net_ns;
1098                ndev = __dev_get_by_name(net, np->dev_name);
1099        }
1100        if (!ndev) {
1101                np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1102                err = -ENODEV;
1103                goto unlock;
1104        }
1105        dev_hold(ndev);
1106
1107        if (netdev_master_upper_dev_get(ndev)) {
1108                np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1109                err = -EBUSY;
1110                goto put;
1111        }
1112
1113        if (!netif_running(ndev)) {
1114                unsigned long atmost, atleast;
1115
1116                np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
1117
1118                err = dev_open(ndev);
1119
1120                if (err) {
1121                        np_err(np, "failed to open %s\n", ndev->name);
1122                        goto put;
1123                }
1124
1125                rtnl_unlock();
1126                atleast = jiffies + HZ/10;
1127                atmost = jiffies + carrier_timeout * HZ;
1128                while (!netif_carrier_ok(ndev)) {
1129                        if (time_after(jiffies, atmost)) {
1130                                np_notice(np, "timeout waiting for carrier\n");
1131                                break;
1132                        }
1133                        msleep(1);
1134                }
1135
1136                /* If carrier appears to come up instantly, we don't
1137                 * trust it and pause so that we don't pump all our
1138                 * queued console messages into the bitbucket.
1139                 */
1140
1141                if (time_before(jiffies, atleast)) {
1142                        np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
1143                        msleep(4000);
1144                }
1145                rtnl_lock();
1146        }
1147
1148        if (!np->local_ip.ip) {
1149                if (!np->ipv6) {
1150                        in_dev = __in_dev_get_rtnl(ndev);
1151
1152                        if (!in_dev || !in_dev->ifa_list) {
1153                                np_err(np, "no IP address for %s, aborting\n",
1154                                       np->dev_name);
1155                                err = -EDESTADDRREQ;
1156                                goto put;
1157                        }
1158
1159                        np->local_ip.ip = in_dev->ifa_list->ifa_local;
1160                        np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1161                } else {
1162#if IS_ENABLED(CONFIG_IPV6)
1163                        struct inet6_dev *idev;
1164
1165                        err = -EDESTADDRREQ;
1166                        idev = __in6_dev_get(ndev);
1167                        if (idev) {
1168                                struct inet6_ifaddr *ifp;
1169
1170                                read_lock_bh(&idev->lock);
1171                                list_for_each_entry(ifp, &idev->addr_list, if_list) {
1172                                        if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1173                                                continue;
1174                                        np->local_ip.in6 = ifp->addr;
1175                                        err = 0;
1176                                        break;
1177                                }
1178                                read_unlock_bh(&idev->lock);
1179                        }
1180                        if (err) {
1181                                np_err(np, "no IPv6 address for %s, aborting\n",
1182                                       np->dev_name);
1183                                goto put;
1184                        } else
1185                                np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1186#else
1187                        np_err(np, "IPv6 is not supported %s, aborting\n",
1188                               np->dev_name);
1189                        err = -EINVAL;
1190                        goto put;
1191#endif
1192                }
1193        }
1194
1195        /* fill up the skb queue */
1196        refill_skbs();
1197
1198        err = __netpoll_setup(np, ndev, GFP_KERNEL);
1199        if (err)
1200                goto put;
1201
1202        rtnl_unlock();
1203        return 0;
1204
1205put:
1206        dev_put(ndev);
1207unlock:
1208        rtnl_unlock();
1209        return err;
1210}
1211EXPORT_SYMBOL(netpoll_setup);
1212
1213static int __init netpoll_init(void)
1214{
1215        skb_queue_head_init(&skb_pool);
1216        return 0;
1217}
1218core_initcall(netpoll_init);
1219
1220static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
1221{
1222        struct netpoll_info *npinfo =
1223                        container_of(rcu_head, struct netpoll_info, rcu);
1224
1225        skb_queue_purge(&npinfo->neigh_tx);
1226        skb_queue_purge(&npinfo->txq);
1227
1228        /* we can't call cancel_delayed_work_sync here, as we are in softirq */
1229        cancel_delayed_work(&npinfo->tx_work);
1230
1231        /* clean after last, unfinished work */
1232        __skb_queue_purge(&npinfo->txq);
1233        /* now cancel it again */
1234        cancel_delayed_work(&npinfo->tx_work);
1235        kfree(npinfo);
1236}
1237
1238void __netpoll_cleanup(struct netpoll *np)
1239{
1240        struct netpoll_info *npinfo;
1241        unsigned long flags;
1242
1243        /* rtnl_dereference would be preferable here but
1244         * rcu_cleanup_netpoll path can put us in here safely without
1245         * holding the rtnl, so plain rcu_dereference it is
1246         */
1247        npinfo = rtnl_dereference(np->dev->npinfo);
1248        if (!npinfo)
1249                return;
1250
1251        if (!list_empty(&npinfo->rx_np)) {
1252                spin_lock_irqsave(&npinfo->rx_lock, flags);
1253                list_del(&np->rx);
1254                if (list_empty(&npinfo->rx_np))
1255                        npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
1256                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1257        }
1258
1259        synchronize_srcu(&netpoll_srcu);
1260
1261        if (atomic_dec_and_test(&npinfo->refcnt)) {
1262                const struct net_device_ops *ops;
1263
1264                ops = np->dev->netdev_ops;
1265                if (ops->ndo_netpoll_cleanup)
1266                        ops->ndo_netpoll_cleanup(np->dev);
1267
1268                rcu_assign_pointer(np->dev->npinfo, NULL);
1269                call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
1270        }
1271}
1272EXPORT_SYMBOL_GPL(__netpoll_cleanup);
1273
1274static void netpoll_async_cleanup(struct work_struct *work)
1275{
1276        struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
1277
1278        rtnl_lock();
1279        __netpoll_cleanup(np);
1280        rtnl_unlock();
1281        kfree(np);
1282}
1283
1284void __netpoll_free_async(struct netpoll *np)
1285{
1286        schedule_work(&np->cleanup_work);
1287}
1288EXPORT_SYMBOL_GPL(__netpoll_free_async);
1289
1290void netpoll_cleanup(struct netpoll *np)
1291{
1292        if (!np->dev)
1293                return;
1294
1295        rtnl_lock();
1296        __netpoll_cleanup(np);
1297        rtnl_unlock();
1298
1299        dev_put(np->dev);
1300        np->dev = NULL;
1301}
1302EXPORT_SYMBOL(netpoll_cleanup);
1303
1304int netpoll_trap(void)
1305{
1306        return atomic_read(&trapped);
1307}
1308EXPORT_SYMBOL(netpoll_trap);
1309
1310void netpoll_set_trap(int trap)
1311{
1312        if (trap)
1313                atomic_inc(&trapped);
1314        else
1315                atomic_dec(&trapped);
1316}
1317EXPORT_SYMBOL(netpoll_set_trap);
1318