linux/net/core/netpoll.c
<<
>>
Prefs
   1/*
   2 * Common framework for low-level network console, dump, and debugger code
   3 *
   4 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
   5 *
   6 * based on the netconsole code from:
   7 *
   8 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
   9 * Copyright (C) 2002  Red Hat, Inc.
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/moduleparam.h>
  15#include <linux/netdevice.h>
  16#include <linux/etherdevice.h>
  17#include <linux/string.h>
  18#include <linux/if_arp.h>
  19#include <linux/inetdevice.h>
  20#include <linux/inet.h>
  21#include <linux/interrupt.h>
  22#include <linux/netpoll.h>
  23#include <linux/sched.h>
  24#include <linux/delay.h>
  25#include <linux/rcupdate.h>
  26#include <linux/workqueue.h>
  27#include <linux/slab.h>
  28#include <linux/export.h>
  29#include <linux/if_vlan.h>
  30#include <net/tcp.h>
  31#include <net/udp.h>
  32#include <net/addrconf.h>
  33#include <net/ndisc.h>
  34#include <net/ip6_checksum.h>
  35#include <asm/unaligned.h>
  36#include <trace/events/napi.h>
  37
  38/*
  39 * We maintain a small pool of fully-sized skbs, to make sure the
  40 * message gets out even in extreme OOM situations.
  41 */
  42
  43#define MAX_UDP_CHUNK 1460
  44#define MAX_SKBS 32
  45
  46static struct sk_buff_head skb_pool;
  47
  48static atomic_t trapped;
  49
  50static struct srcu_struct netpoll_srcu;
  51
  52#define USEC_PER_POLL   50
  53#define NETPOLL_RX_ENABLED  1
  54#define NETPOLL_RX_DROP     2
  55
  56#define MAX_SKB_SIZE                                                    \
  57        (sizeof(struct ethhdr) +                                        \
  58         sizeof(struct iphdr) +                                         \
  59         sizeof(struct udphdr) +                                        \
  60         MAX_UDP_CHUNK)
  61
  62static void zap_completion_queue(void);
  63static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
  64static void netpoll_async_cleanup(struct work_struct *work);
  65
  66static unsigned int carrier_timeout = 4;
  67module_param(carrier_timeout, uint, 0644);
  68
  69#define np_info(np, fmt, ...)                           \
  70        pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
  71#define np_err(np, fmt, ...)                            \
  72        pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
  73#define np_notice(np, fmt, ...)                         \
  74        pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
  75
  76static void queue_process(struct work_struct *work)
  77{
  78        struct netpoll_info *npinfo =
  79                container_of(work, struct netpoll_info, tx_work.work);
  80        struct sk_buff *skb;
  81        unsigned long flags;
  82
  83        while ((skb = skb_dequeue(&npinfo->txq))) {
  84                struct net_device *dev = skb->dev;
  85                const struct net_device_ops *ops = dev->netdev_ops;
  86                struct netdev_queue *txq;
  87
  88                if (!netif_device_present(dev) || !netif_running(dev)) {
  89                        __kfree_skb(skb);
  90                        continue;
  91                }
  92
  93                txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
  94
  95                local_irq_save(flags);
  96                __netif_tx_lock(txq, smp_processor_id());
  97                if (netif_xmit_frozen_or_stopped(txq) ||
  98                    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
  99                        skb_queue_head(&npinfo->txq, skb);
 100                        __netif_tx_unlock(txq);
 101                        local_irq_restore(flags);
 102
 103                        schedule_delayed_work(&npinfo->tx_work, HZ/10);
 104                        return;
 105                }
 106                __netif_tx_unlock(txq);
 107                local_irq_restore(flags);
 108        }
 109}
 110
 111static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 112                            unsigned short ulen, __be32 saddr, __be32 daddr)
 113{
 114        __wsum psum;
 115
 116        if (uh->check == 0 || skb_csum_unnecessary(skb))
 117                return 0;
 118
 119        psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 120
 121        if (skb->ip_summed == CHECKSUM_COMPLETE &&
 122            !csum_fold(csum_add(psum, skb->csum)))
 123                return 0;
 124
 125        skb->csum = psum;
 126
 127        return __skb_checksum_complete(skb);
 128}
 129
 130/*
 131 * Check whether delayed processing was scheduled for our NIC. If so,
 132 * we attempt to grab the poll lock and use ->poll() to pump the card.
 133 * If this fails, either we've recursed in ->poll() or it's already
 134 * running on another CPU.
 135 *
 136 * Note: we don't mask interrupts with this lock because we're using
 137 * trylock here and interrupts are already disabled in the softirq
 138 * case. Further, we test the poll_owner to avoid recursion on UP
 139 * systems where the lock doesn't exist.
 140 *
 141 * In cases where there is bi-directional communications, reading only
 142 * one message at a time can lead to packets being dropped by the
 143 * network adapter, forcing superfluous retries and possibly timeouts.
 144 * Thus, we set our budget to greater than 1.
 145 */
 146static int poll_one_napi(struct netpoll_info *npinfo,
 147                         struct napi_struct *napi, int budget)
 148{
 149        int work;
 150
 151        /* net_rx_action's ->poll() invocations and our's are
 152         * synchronized by this test which is only made while
 153         * holding the napi->poll_lock.
 154         */
 155        if (!test_bit(NAPI_STATE_SCHED, &napi->state))
 156                return budget;
 157
 158        npinfo->rx_flags |= NETPOLL_RX_DROP;
 159        atomic_inc(&trapped);
 160        set_bit(NAPI_STATE_NPSVC, &napi->state);
 161
 162        work = napi->poll(napi, budget);
 163        trace_napi_poll(napi);
 164
 165        clear_bit(NAPI_STATE_NPSVC, &napi->state);
 166        atomic_dec(&trapped);
 167        npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 168
 169        return budget - work;
 170}
 171
 172static void poll_napi(struct net_device *dev)
 173{
 174        struct napi_struct *napi;
 175        int budget = 16;
 176
 177        list_for_each_entry(napi, &dev->napi_list, dev_list) {
 178                if (napi->poll_owner != smp_processor_id() &&
 179                    spin_trylock(&napi->poll_lock)) {
 180                        budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
 181                                               napi, budget);
 182                        spin_unlock(&napi->poll_lock);
 183
 184                        if (!budget)
 185                                break;
 186                }
 187        }
 188}
 189
 190static void service_neigh_queue(struct netpoll_info *npi)
 191{
 192        if (npi) {
 193                struct sk_buff *skb;
 194
 195                while ((skb = skb_dequeue(&npi->neigh_tx)))
 196                        netpoll_neigh_reply(skb, npi);
 197        }
 198}
 199
 200static void netpoll_poll_dev(struct net_device *dev)
 201{
 202        const struct net_device_ops *ops;
 203        struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 204
 205        /* Don't do any rx activity if the dev_lock mutex is held
 206         * the dev_open/close paths use this to block netpoll activity
 207         * while changing device state
 208         */
 209        if (!mutex_trylock(&ni->dev_lock))
 210                return;
 211
 212        if (!netif_running(dev)) {
 213                mutex_unlock(&ni->dev_lock);
 214                return;
 215        }
 216
 217        ops = dev->netdev_ops;
 218        if (!ops->ndo_poll_controller) {
 219                mutex_unlock(&ni->dev_lock);
 220                return;
 221        }
 222
 223        /* Process pending work on NIC */
 224        ops->ndo_poll_controller(dev);
 225
 226        poll_napi(dev);
 227
 228        mutex_unlock(&ni->dev_lock);
 229
 230        if (dev->flags & IFF_SLAVE) {
 231                if (ni) {
 232                        struct net_device *bond_dev;
 233                        struct sk_buff *skb;
 234                        struct netpoll_info *bond_ni;
 235
 236                        bond_dev = netdev_master_upper_dev_get_rcu(dev);
 237                        bond_ni = rcu_dereference_bh(bond_dev->npinfo);
 238                        while ((skb = skb_dequeue(&ni->neigh_tx))) {
 239                                skb->dev = bond_dev;
 240                                skb_queue_tail(&bond_ni->neigh_tx, skb);
 241                        }
 242                }
 243        }
 244
 245        service_neigh_queue(ni);
 246
 247        zap_completion_queue();
 248}
 249
 250int netpoll_rx_disable(struct net_device *dev)
 251{
 252        struct netpoll_info *ni;
 253        int idx;
 254        might_sleep();
 255        idx = srcu_read_lock(&netpoll_srcu);
 256        ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
 257        if (ni)
 258                mutex_lock(&ni->dev_lock);
 259        srcu_read_unlock(&netpoll_srcu, idx);
 260        return 0;
 261}
 262EXPORT_SYMBOL(netpoll_rx_disable);
 263
 264void netpoll_rx_enable(struct net_device *dev)
 265{
 266        struct netpoll_info *ni;
 267        rcu_read_lock();
 268        ni = rcu_dereference(dev->npinfo);
 269        if (ni)
 270                mutex_unlock(&ni->dev_lock);
 271        rcu_read_unlock();
 272}
 273EXPORT_SYMBOL(netpoll_rx_enable);
 274
 275static void refill_skbs(void)
 276{
 277        struct sk_buff *skb;
 278        unsigned long flags;
 279
 280        spin_lock_irqsave(&skb_pool.lock, flags);
 281        while (skb_pool.qlen < MAX_SKBS) {
 282                skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
 283                if (!skb)
 284                        break;
 285
 286                __skb_queue_tail(&skb_pool, skb);
 287        }
 288        spin_unlock_irqrestore(&skb_pool.lock, flags);
 289}
 290
 291static void zap_completion_queue(void)
 292{
 293        unsigned long flags;
 294        struct softnet_data *sd = &get_cpu_var(softnet_data);
 295
 296        if (sd->completion_queue) {
 297                struct sk_buff *clist;
 298
 299                local_irq_save(flags);
 300                clist = sd->completion_queue;
 301                sd->completion_queue = NULL;
 302                local_irq_restore(flags);
 303
 304                while (clist != NULL) {
 305                        struct sk_buff *skb = clist;
 306                        clist = clist->next;
 307                        if (skb->destructor) {
 308                                atomic_inc(&skb->users);
 309                                dev_kfree_skb_any(skb); /* put this one back */
 310                        } else {
 311                                __kfree_skb(skb);
 312                        }
 313                }
 314        }
 315
 316        put_cpu_var(softnet_data);
 317}
 318
 319static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
 320{
 321        int count = 0;
 322        struct sk_buff *skb;
 323
 324        zap_completion_queue();
 325        refill_skbs();
 326repeat:
 327
 328        skb = alloc_skb(len, GFP_ATOMIC);
 329        if (!skb)
 330                skb = skb_dequeue(&skb_pool);
 331
 332        if (!skb) {
 333                if (++count < 10) {
 334                        netpoll_poll_dev(np->dev);
 335                        goto repeat;
 336                }
 337                return NULL;
 338        }
 339
 340        atomic_set(&skb->users, 1);
 341        skb_reserve(skb, reserve);
 342        return skb;
 343}
 344
 345static int netpoll_owner_active(struct net_device *dev)
 346{
 347        struct napi_struct *napi;
 348
 349        list_for_each_entry(napi, &dev->napi_list, dev_list) {
 350                if (napi->poll_owner == smp_processor_id())
 351                        return 1;
 352        }
 353        return 0;
 354}
 355
 356/* call with IRQ disabled */
 357void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 358                             struct net_device *dev)
 359{
 360        int status = NETDEV_TX_BUSY;
 361        unsigned long tries;
 362        const struct net_device_ops *ops = dev->netdev_ops;
 363        /* It is up to the caller to keep npinfo alive. */
 364        struct netpoll_info *npinfo;
 365
 366        WARN_ON_ONCE(!irqs_disabled());
 367
 368        npinfo = rcu_dereference_bh(np->dev->npinfo);
 369        if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 370                __kfree_skb(skb);
 371                return;
 372        }
 373
 374        /* don't get messages out of order, and no recursion */
 375        if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 376                struct netdev_queue *txq;
 377
 378                txq = netdev_pick_tx(dev, skb);
 379
 380                /* try until next clock tick */
 381                for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 382                     tries > 0; --tries) {
 383                        if (__netif_tx_trylock(txq)) {
 384                                if (!netif_xmit_stopped(txq)) {
 385                                        if (vlan_tx_tag_present(skb) &&
 386                                            !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
 387                                                skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
 388                                                if (unlikely(!skb))
 389                                                        break;
 390                                                skb->vlan_tci = 0;
 391                                        }
 392
 393                                        status = ops->ndo_start_xmit(skb, dev);
 394                                        if (status == NETDEV_TX_OK)
 395                                                txq_trans_update(txq);
 396                                }
 397                                __netif_tx_unlock(txq);
 398
 399                                if (status == NETDEV_TX_OK)
 400                                        break;
 401
 402                        }
 403
 404                        /* tickle device maybe there is some cleanup */
 405                        netpoll_poll_dev(np->dev);
 406
 407                        udelay(USEC_PER_POLL);
 408                }
 409
 410                WARN_ONCE(!irqs_disabled(),
 411                        "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
 412                        dev->name, ops->ndo_start_xmit);
 413
 414        }
 415
 416        if (status != NETDEV_TX_OK) {
 417                skb_queue_tail(&npinfo->txq, skb);
 418                schedule_delayed_work(&npinfo->tx_work,0);
 419        }
 420}
 421EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 422
 423void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 424{
 425        int total_len, ip_len, udp_len;
 426        struct sk_buff *skb;
 427        struct udphdr *udph;
 428        struct iphdr *iph;
 429        struct ethhdr *eth;
 430        static atomic_t ip_ident;
 431        struct ipv6hdr *ip6h;
 432
 433        udp_len = len + sizeof(*udph);
 434        if (np->ipv6)
 435                ip_len = udp_len + sizeof(*ip6h);
 436        else
 437                ip_len = udp_len + sizeof(*iph);
 438
 439        total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 440
 441        skb = find_skb(np, total_len + np->dev->needed_tailroom,
 442                       total_len - len);
 443        if (!skb)
 444                return;
 445
 446        skb_copy_to_linear_data(skb, msg, len);
 447        skb_put(skb, len);
 448
 449        skb_push(skb, sizeof(*udph));
 450        skb_reset_transport_header(skb);
 451        udph = udp_hdr(skb);
 452        udph->source = htons(np->local_port);
 453        udph->dest = htons(np->remote_port);
 454        udph->len = htons(udp_len);
 455
 456        if (np->ipv6) {
 457                udph->check = 0;
 458                udph->check = csum_ipv6_magic(&np->local_ip.in6,
 459                                              &np->remote_ip.in6,
 460                                              udp_len, IPPROTO_UDP,
 461                                              csum_partial(udph, udp_len, 0));
 462                if (udph->check == 0)
 463                        udph->check = CSUM_MANGLED_0;
 464
 465                skb_push(skb, sizeof(*ip6h));
 466                skb_reset_network_header(skb);
 467                ip6h = ipv6_hdr(skb);
 468
 469                /* ip6h->version = 6; ip6h->priority = 0; */
 470                put_unaligned(0x60, (unsigned char *)ip6h);
 471                ip6h->flow_lbl[0] = 0;
 472                ip6h->flow_lbl[1] = 0;
 473                ip6h->flow_lbl[2] = 0;
 474
 475                ip6h->payload_len = htons(sizeof(struct udphdr) + len);
 476                ip6h->nexthdr = IPPROTO_UDP;
 477                ip6h->hop_limit = 32;
 478                ip6h->saddr = np->local_ip.in6;
 479                ip6h->daddr = np->remote_ip.in6;
 480
 481                eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 482                skb_reset_mac_header(skb);
 483                skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
 484        } else {
 485                udph->check = 0;
 486                udph->check = csum_tcpudp_magic(np->local_ip.ip,
 487                                                np->remote_ip.ip,
 488                                                udp_len, IPPROTO_UDP,
 489                                                csum_partial(udph, udp_len, 0));
 490                if (udph->check == 0)
 491                        udph->check = CSUM_MANGLED_0;
 492
 493                skb_push(skb, sizeof(*iph));
 494                skb_reset_network_header(skb);
 495                iph = ip_hdr(skb);
 496
 497                /* iph->version = 4; iph->ihl = 5; */
 498                put_unaligned(0x45, (unsigned char *)iph);
 499                iph->tos      = 0;
 500                put_unaligned(htons(ip_len), &(iph->tot_len));
 501                iph->id       = htons(atomic_inc_return(&ip_ident));
 502                iph->frag_off = 0;
 503                iph->ttl      = 64;
 504                iph->protocol = IPPROTO_UDP;
 505                iph->check    = 0;
 506                put_unaligned(np->local_ip.ip, &(iph->saddr));
 507                put_unaligned(np->remote_ip.ip, &(iph->daddr));
 508                iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 509
 510                eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 511                skb_reset_mac_header(skb);
 512                skb->protocol = eth->h_proto = htons(ETH_P_IP);
 513        }
 514
 515        memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
 516        memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 517
 518        skb->dev = np->dev;
 519
 520        netpoll_send_skb(np, skb);
 521}
 522EXPORT_SYMBOL(netpoll_send_udp);
 523
 524static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 525{
 526        int size, type = ARPOP_REPLY;
 527        __be32 sip, tip;
 528        unsigned char *sha;
 529        struct sk_buff *send_skb;
 530        struct netpoll *np, *tmp;
 531        unsigned long flags;
 532        int hlen, tlen;
 533        int hits = 0, proto;
 534
 535        if (list_empty(&npinfo->rx_np))
 536                return;
 537
 538        /* Before checking the packet, we do some early
 539           inspection whether this is interesting at all */
 540        spin_lock_irqsave(&npinfo->rx_lock, flags);
 541        list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 542                if (np->dev == skb->dev)
 543                        hits++;
 544        }
 545        spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 546
 547        /* No netpoll struct is using this dev */
 548        if (!hits)
 549                return;
 550
 551        proto = ntohs(eth_hdr(skb)->h_proto);
 552        if (proto == ETH_P_IP) {
 553                struct arphdr *arp;
 554                unsigned char *arp_ptr;
 555                /* No arp on this interface */
 556                if (skb->dev->flags & IFF_NOARP)
 557                        return;
 558
 559                if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 560                        return;
 561
 562                skb_reset_network_header(skb);
 563                skb_reset_transport_header(skb);
 564                arp = arp_hdr(skb);
 565
 566                if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
 567                     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
 568                    arp->ar_pro != htons(ETH_P_IP) ||
 569                    arp->ar_op != htons(ARPOP_REQUEST))
 570                        return;
 571
 572                arp_ptr = (unsigned char *)(arp+1);
 573                /* save the location of the src hw addr */
 574                sha = arp_ptr;
 575                arp_ptr += skb->dev->addr_len;
 576                memcpy(&sip, arp_ptr, 4);
 577                arp_ptr += 4;
 578                /* If we actually cared about dst hw addr,
 579                   it would get copied here */
 580                arp_ptr += skb->dev->addr_len;
 581                memcpy(&tip, arp_ptr, 4);
 582
 583                /* Should we ignore arp? */
 584                if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 585                        return;
 586
 587                size = arp_hdr_len(skb->dev);
 588
 589                spin_lock_irqsave(&npinfo->rx_lock, flags);
 590                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 591                        if (tip != np->local_ip.ip)
 592                                continue;
 593
 594                        hlen = LL_RESERVED_SPACE(np->dev);
 595                        tlen = np->dev->needed_tailroom;
 596                        send_skb = find_skb(np, size + hlen + tlen, hlen);
 597                        if (!send_skb)
 598                                continue;
 599
 600                        skb_reset_network_header(send_skb);
 601                        arp = (struct arphdr *) skb_put(send_skb, size);
 602                        send_skb->dev = skb->dev;
 603                        send_skb->protocol = htons(ETH_P_ARP);
 604
 605                        /* Fill the device header for the ARP frame */
 606                        if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
 607                                            sha, np->dev->dev_addr,
 608                                            send_skb->len) < 0) {
 609                                kfree_skb(send_skb);
 610                                continue;
 611                        }
 612
 613                        /*
 614                         * Fill out the arp protocol part.
 615                         *
 616                         * we only support ethernet device type,
 617                         * which (according to RFC 1390) should
 618                         * always equal 1 (Ethernet).
 619                         */
 620
 621                        arp->ar_hrd = htons(np->dev->type);
 622                        arp->ar_pro = htons(ETH_P_IP);
 623                        arp->ar_hln = np->dev->addr_len;
 624                        arp->ar_pln = 4;
 625                        arp->ar_op = htons(type);
 626
 627                        arp_ptr = (unsigned char *)(arp + 1);
 628                        memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
 629                        arp_ptr += np->dev->addr_len;
 630                        memcpy(arp_ptr, &tip, 4);
 631                        arp_ptr += 4;
 632                        memcpy(arp_ptr, sha, np->dev->addr_len);
 633                        arp_ptr += np->dev->addr_len;
 634                        memcpy(arp_ptr, &sip, 4);
 635
 636                        netpoll_send_skb(np, send_skb);
 637
 638                        /* If there are several rx_hooks for the same address,
 639                           we're fine by sending a single reply */
 640                        break;
 641                }
 642                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 643        } else if( proto == ETH_P_IPV6) {
 644#if IS_ENABLED(CONFIG_IPV6)
 645                struct nd_msg *msg;
 646                u8 *lladdr = NULL;
 647                struct ipv6hdr *hdr;
 648                struct icmp6hdr *icmp6h;
 649                const struct in6_addr *saddr;
 650                const struct in6_addr *daddr;
 651                struct inet6_dev *in6_dev = NULL;
 652                struct in6_addr *target;
 653
 654                in6_dev = in6_dev_get(skb->dev);
 655                if (!in6_dev || !in6_dev->cnf.accept_ra)
 656                        return;
 657
 658                if (!pskb_may_pull(skb, skb->len))
 659                        return;
 660
 661                msg = (struct nd_msg *)skb_transport_header(skb);
 662
 663                __skb_push(skb, skb->data - skb_transport_header(skb));
 664
 665                if (ipv6_hdr(skb)->hop_limit != 255)
 666                        return;
 667                if (msg->icmph.icmp6_code != 0)
 668                        return;
 669                if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
 670                        return;
 671
 672                saddr = &ipv6_hdr(skb)->saddr;
 673                daddr = &ipv6_hdr(skb)->daddr;
 674
 675                size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
 676
 677                spin_lock_irqsave(&npinfo->rx_lock, flags);
 678                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 679                        if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
 680                                continue;
 681
 682                        hlen = LL_RESERVED_SPACE(np->dev);
 683                        tlen = np->dev->needed_tailroom;
 684                        send_skb = find_skb(np, size + hlen + tlen, hlen);
 685                        if (!send_skb)
 686                                continue;
 687
 688                        send_skb->protocol = htons(ETH_P_IPV6);
 689                        send_skb->dev = skb->dev;
 690
 691                        skb_reset_network_header(send_skb);
 692                        skb_put(send_skb, sizeof(struct ipv6hdr));
 693                        hdr = ipv6_hdr(send_skb);
 694
 695                        *(__be32*)hdr = htonl(0x60000000);
 696
 697                        hdr->payload_len = htons(size);
 698                        hdr->nexthdr = IPPROTO_ICMPV6;
 699                        hdr->hop_limit = 255;
 700                        hdr->saddr = *saddr;
 701                        hdr->daddr = *daddr;
 702
 703                        send_skb->transport_header = send_skb->tail;
 704                        skb_put(send_skb, size);
 705
 706                        icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
 707                        icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
 708                        icmp6h->icmp6_router = 0;
 709                        icmp6h->icmp6_solicited = 1;
 710                        target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
 711                        *target = msg->target;
 712                        icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
 713                                                              IPPROTO_ICMPV6,
 714                                                              csum_partial(icmp6h,
 715                                                                           size, 0));
 716
 717                        if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
 718                                            lladdr, np->dev->dev_addr,
 719                                            send_skb->len) < 0) {
 720                                kfree_skb(send_skb);
 721                                continue;
 722                        }
 723
 724                        netpoll_send_skb(np, send_skb);
 725
 726                        /* If there are several rx_hooks for the same address,
 727                           we're fine by sending a single reply */
 728                        break;
 729                }
 730                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 731#endif
 732        }
 733}
 734
 735static bool pkt_is_ns(struct sk_buff *skb)
 736{
 737        struct nd_msg *msg;
 738        struct ipv6hdr *hdr;
 739
 740        if (skb->protocol != htons(ETH_P_ARP))
 741                return false;
 742        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
 743                return false;
 744
 745        msg = (struct nd_msg *)skb_transport_header(skb);
 746        __skb_push(skb, skb->data - skb_transport_header(skb));
 747        hdr = ipv6_hdr(skb);
 748
 749        if (hdr->nexthdr != IPPROTO_ICMPV6)
 750                return false;
 751        if (hdr->hop_limit != 255)
 752                return false;
 753        if (msg->icmph.icmp6_code != 0)
 754                return false;
 755        if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
 756                return false;
 757
 758        return true;
 759}
 760
 761int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 762{
 763        int proto, len, ulen;
 764        int hits = 0;
 765        const struct iphdr *iph;
 766        struct udphdr *uh;
 767        struct netpoll *np, *tmp;
 768
 769        if (list_empty(&npinfo->rx_np))
 770                goto out;
 771
 772        if (skb->dev->type != ARPHRD_ETHER)
 773                goto out;
 774
 775        /* check if netpoll clients need ARP */
 776        if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
 777                skb_queue_tail(&npinfo->neigh_tx, skb);
 778                return 1;
 779        } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
 780                skb_queue_tail(&npinfo->neigh_tx, skb);
 781                return 1;
 782        }
 783
 784        if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
 785                skb = vlan_untag(skb);
 786                if (unlikely(!skb))
 787                        goto out;
 788        }
 789
 790        proto = ntohs(eth_hdr(skb)->h_proto);
 791        if (proto != ETH_P_IP && proto != ETH_P_IPV6)
 792                goto out;
 793        if (skb->pkt_type == PACKET_OTHERHOST)
 794                goto out;
 795        if (skb_shared(skb))
 796                goto out;
 797
 798        if (proto == ETH_P_IP) {
 799                if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 800                        goto out;
 801                iph = (struct iphdr *)skb->data;
 802                if (iph->ihl < 5 || iph->version != 4)
 803                        goto out;
 804                if (!pskb_may_pull(skb, iph->ihl*4))
 805                        goto out;
 806                iph = (struct iphdr *)skb->data;
 807                if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
 808                        goto out;
 809
 810                len = ntohs(iph->tot_len);
 811                if (skb->len < len || len < iph->ihl*4)
 812                        goto out;
 813
 814                /*
 815                 * Our transport medium may have padded the buffer out.
 816                 * Now We trim to the true length of the frame.
 817                 */
 818                if (pskb_trim_rcsum(skb, len))
 819                        goto out;
 820
 821                iph = (struct iphdr *)skb->data;
 822                if (iph->protocol != IPPROTO_UDP)
 823                        goto out;
 824
 825                len -= iph->ihl*4;
 826                uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
 827                ulen = ntohs(uh->len);
 828
 829                if (ulen != len)
 830                        goto out;
 831                if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 832                        goto out;
 833                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 834                        if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
 835                                continue;
 836                        if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
 837                                continue;
 838                        if (np->local_port && np->local_port != ntohs(uh->dest))
 839                                continue;
 840
 841                        np->rx_hook(np, ntohs(uh->source),
 842                                       (char *)(uh+1),
 843                                       ulen - sizeof(struct udphdr));
 844                        hits++;
 845                }
 846        } else {
 847#if IS_ENABLED(CONFIG_IPV6)
 848                const struct ipv6hdr *ip6h;
 849
 850                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 851                        goto out;
 852                ip6h = (struct ipv6hdr *)skb->data;
 853                if (ip6h->version != 6)
 854                        goto out;
 855                len = ntohs(ip6h->payload_len);
 856                if (!len)
 857                        goto out;
 858                if (len + sizeof(struct ipv6hdr) > skb->len)
 859                        goto out;
 860                if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
 861                        goto out;
 862                ip6h = ipv6_hdr(skb);
 863                if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 864                        goto out;
 865                uh = udp_hdr(skb);
 866                ulen = ntohs(uh->len);
 867                if (ulen != skb->len)
 868                        goto out;
 869                if (udp6_csum_init(skb, uh, IPPROTO_UDP))
 870                        goto out;
 871                list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
 872                        if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
 873                                continue;
 874                        if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
 875                                continue;
 876                        if (np->local_port && np->local_port != ntohs(uh->dest))
 877                                continue;
 878
 879                        np->rx_hook(np, ntohs(uh->source),
 880                                       (char *)(uh+1),
 881                                       ulen - sizeof(struct udphdr));
 882                        hits++;
 883                }
 884#endif
 885        }
 886
 887        if (!hits)
 888                goto out;
 889
 890        kfree_skb(skb);
 891        return 1;
 892
 893out:
 894        if (atomic_read(&trapped)) {
 895                kfree_skb(skb);
 896                return 1;
 897        }
 898
 899        return 0;
 900}
 901
 902void netpoll_print_options(struct netpoll *np)
 903{
 904        np_info(np, "local port %d\n", np->local_port);
 905        if (np->ipv6)
 906                np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
 907        else
 908                np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
 909        np_info(np, "interface '%s'\n", np->dev_name);
 910        np_info(np, "remote port %d\n", np->remote_port);
 911        if (np->ipv6)
 912                np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
 913        else
 914                np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
 915        np_info(np, "remote ethernet address %pM\n", np->remote_mac);
 916}
 917EXPORT_SYMBOL(netpoll_print_options);
 918
 919static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
 920{
 921        const char *end;
 922
 923        if (!strchr(str, ':') &&
 924            in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
 925                if (!*end)
 926                        return 0;
 927        }
 928        if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
 929#if IS_ENABLED(CONFIG_IPV6)
 930                if (!*end)
 931                        return 1;
 932#else
 933                return -1;
 934#endif
 935        }
 936        return -1;
 937}
 938
 939int netpoll_parse_options(struct netpoll *np, char *opt)
 940{
 941        char *cur=opt, *delim;
 942        int ipv6;
 943
 944        if (*cur != '@') {
 945                if ((delim = strchr(cur, '@')) == NULL)
 946                        goto parse_failed;
 947                *delim = 0;
 948                if (kstrtou16(cur, 10, &np->local_port))
 949                        goto parse_failed;
 950                cur = delim;
 951        }
 952        cur++;
 953
 954        if (*cur != '/') {
 955                if ((delim = strchr(cur, '/')) == NULL)
 956                        goto parse_failed;
 957                *delim = 0;
 958                ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
 959                if (ipv6 < 0)
 960                        goto parse_failed;
 961                else
 962                        np->ipv6 = (bool)ipv6;
 963                cur = delim;
 964        }
 965        cur++;
 966
 967        if (*cur != ',') {
 968                /* parse out dev name */
 969                if ((delim = strchr(cur, ',')) == NULL)
 970                        goto parse_failed;
 971                *delim = 0;
 972                strlcpy(np->dev_name, cur, sizeof(np->dev_name));
 973                cur = delim;
 974        }
 975        cur++;
 976
 977        if (*cur != '@') {
 978                /* dst port */
 979                if ((delim = strchr(cur, '@')) == NULL)
 980                        goto parse_failed;
 981                *delim = 0;
 982                if (*cur == ' ' || *cur == '\t')
 983                        np_info(np, "warning: whitespace is not allowed\n");
 984                if (kstrtou16(cur, 10, &np->remote_port))
 985                        goto parse_failed;
 986                cur = delim;
 987        }
 988        cur++;
 989
 990        /* dst ip */
 991        if ((delim = strchr(cur, '/')) == NULL)
 992                goto parse_failed;
 993        *delim = 0;
 994        ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
 995        if (ipv6 < 0)
 996                goto parse_failed;
 997        else if (np->ipv6 != (bool)ipv6)
 998                goto parse_failed;
 999        else
1000                np->ipv6 = (bool)ipv6;
1001        cur = delim + 1;
1002
1003        if (*cur != 0) {
1004                /* MAC address */
1005                if (!mac_pton(cur, np->remote_mac))
1006                        goto parse_failed;
1007        }
1008
1009        netpoll_print_options(np);
1010
1011        return 0;
1012
1013 parse_failed:
1014        np_info(np, "couldn't parse config at '%s'!\n", cur);
1015        return -1;
1016}
1017EXPORT_SYMBOL(netpoll_parse_options);
1018
1019int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1020{
1021        struct netpoll_info *npinfo;
1022        const struct net_device_ops *ops;
1023        unsigned long flags;
1024        int err;
1025
1026        np->dev = ndev;
1027        strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1028        INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
1029
1030        if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
1031            !ndev->netdev_ops->ndo_poll_controller) {
1032                np_err(np, "%s doesn't support polling, aborting\n",
1033                       np->dev_name);
1034                err = -ENOTSUPP;
1035                goto out;
1036        }
1037
1038        if (!ndev->npinfo) {
1039                npinfo = kmalloc(sizeof(*npinfo), gfp);
1040                if (!npinfo) {
1041                        err = -ENOMEM;
1042                        goto out;
1043                }
1044
1045                npinfo->rx_flags = 0;
1046                INIT_LIST_HEAD(&npinfo->rx_np);
1047
1048                spin_lock_init(&npinfo->rx_lock);
1049                mutex_init(&npinfo->dev_lock);
1050                skb_queue_head_init(&npinfo->neigh_tx);
1051                skb_queue_head_init(&npinfo->txq);
1052                INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
1053
1054                atomic_set(&npinfo->refcnt, 1);
1055
1056                ops = np->dev->netdev_ops;
1057                if (ops->ndo_netpoll_setup) {
1058                        err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1059                        if (err)
1060                                goto free_npinfo;
1061                }
1062        } else {
1063                npinfo = rtnl_dereference(ndev->npinfo);
1064                atomic_inc(&npinfo->refcnt);
1065        }
1066
1067        npinfo->netpoll = np;
1068
1069        if (np->rx_hook) {
1070                spin_lock_irqsave(&npinfo->rx_lock, flags);
1071                npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1072                list_add_tail(&np->rx, &npinfo->rx_np);
1073                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1074        }
1075
1076        /* last thing to do is link it to the net device structure */
1077        rcu_assign_pointer(ndev->npinfo, npinfo);
1078
1079        return 0;
1080
1081free_npinfo:
1082        kfree(npinfo);
1083out:
1084        return err;
1085}
1086EXPORT_SYMBOL_GPL(__netpoll_setup);
1087
1088int netpoll_setup(struct netpoll *np)
1089{
1090        struct net_device *ndev = NULL;
1091        struct in_device *in_dev;
1092        int err;
1093
1094        rtnl_lock();
1095        if (np->dev_name) {
1096                struct net *net = current->nsproxy->net_ns;
1097                ndev = __dev_get_by_name(net, np->dev_name);
1098        }
1099        if (!ndev) {
1100                np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1101                err = -ENODEV;
1102                goto unlock;
1103        }
1104        dev_hold(ndev);
1105
1106        if (netdev_master_upper_dev_get(ndev)) {
1107                np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1108                err = -EBUSY;
1109                goto put;
1110        }
1111
1112        if (!netif_running(ndev)) {
1113                unsigned long atmost, atleast;
1114
1115                np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
1116
1117                err = dev_open(ndev);
1118
1119                if (err) {
1120                        np_err(np, "failed to open %s\n", ndev->name);
1121                        goto put;
1122                }
1123
1124                rtnl_unlock();
1125                atleast = jiffies + HZ/10;
1126                atmost = jiffies + carrier_timeout * HZ;
1127                while (!netif_carrier_ok(ndev)) {
1128                        if (time_after(jiffies, atmost)) {
1129                                np_notice(np, "timeout waiting for carrier\n");
1130                                break;
1131                        }
1132                        msleep(1);
1133                }
1134
1135                /* If carrier appears to come up instantly, we don't
1136                 * trust it and pause so that we don't pump all our
1137                 * queued console messages into the bitbucket.
1138                 */
1139
1140                if (time_before(jiffies, atleast)) {
1141                        np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
1142                        msleep(4000);
1143                }
1144                rtnl_lock();
1145        }
1146
1147        if (!np->local_ip.ip) {
1148                if (!np->ipv6) {
1149                        in_dev = __in_dev_get_rtnl(ndev);
1150
1151                        if (!in_dev || !in_dev->ifa_list) {
1152                                np_err(np, "no IP address for %s, aborting\n",
1153                                       np->dev_name);
1154                                err = -EDESTADDRREQ;
1155                                goto put;
1156                        }
1157
1158                        np->local_ip.ip = in_dev->ifa_list->ifa_local;
1159                        np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1160                } else {
1161#if IS_ENABLED(CONFIG_IPV6)
1162                        struct inet6_dev *idev;
1163
1164                        err = -EDESTADDRREQ;
1165                        idev = __in6_dev_get(ndev);
1166                        if (idev) {
1167                                struct inet6_ifaddr *ifp;
1168
1169                                read_lock_bh(&idev->lock);
1170                                list_for_each_entry(ifp, &idev->addr_list, if_list) {
1171                                        if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1172                                                continue;
1173                                        np->local_ip.in6 = ifp->addr;
1174                                        err = 0;
1175                                        break;
1176                                }
1177                                read_unlock_bh(&idev->lock);
1178                        }
1179                        if (err) {
1180                                np_err(np, "no IPv6 address for %s, aborting\n",
1181                                       np->dev_name);
1182                                goto put;
1183                        } else
1184                                np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1185#else
1186                        np_err(np, "IPv6 is not supported %s, aborting\n",
1187                               np->dev_name);
1188                        err = -EINVAL;
1189                        goto put;
1190#endif
1191                }
1192        }
1193
1194        /* fill up the skb queue */
1195        refill_skbs();
1196
1197        err = __netpoll_setup(np, ndev, GFP_KERNEL);
1198        if (err)
1199                goto put;
1200
1201        rtnl_unlock();
1202        return 0;
1203
1204put:
1205        dev_put(ndev);
1206unlock:
1207        rtnl_unlock();
1208        return err;
1209}
1210EXPORT_SYMBOL(netpoll_setup);
1211
1212static int __init netpoll_init(void)
1213{
1214        skb_queue_head_init(&skb_pool);
1215        init_srcu_struct(&netpoll_srcu);
1216        return 0;
1217}
1218core_initcall(netpoll_init);
1219
1220static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
1221{
1222        struct netpoll_info *npinfo =
1223                        container_of(rcu_head, struct netpoll_info, rcu);
1224
1225        skb_queue_purge(&npinfo->neigh_tx);
1226        skb_queue_purge(&npinfo->txq);
1227
1228        /* we can't call cancel_delayed_work_sync here, as we are in softirq */
1229        cancel_delayed_work(&npinfo->tx_work);
1230
1231        /* clean after last, unfinished work */
1232        __skb_queue_purge(&npinfo->txq);
1233        /* now cancel it again */
1234        cancel_delayed_work(&npinfo->tx_work);
1235        kfree(npinfo);
1236}
1237
1238void __netpoll_cleanup(struct netpoll *np)
1239{
1240        struct netpoll_info *npinfo;
1241        unsigned long flags;
1242
1243        /* rtnl_dereference would be preferable here but
1244         * rcu_cleanup_netpoll path can put us in here safely without
1245         * holding the rtnl, so plain rcu_dereference it is
1246         */
1247        npinfo = rtnl_dereference(np->dev->npinfo);
1248        if (!npinfo)
1249                return;
1250
1251        if (!list_empty(&npinfo->rx_np)) {
1252                spin_lock_irqsave(&npinfo->rx_lock, flags);
1253                list_del(&np->rx);
1254                if (list_empty(&npinfo->rx_np))
1255                        npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
1256                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1257        }
1258
1259        synchronize_srcu(&netpoll_srcu);
1260
1261        if (atomic_dec_and_test(&npinfo->refcnt)) {
1262                const struct net_device_ops *ops;
1263
1264                ops = np->dev->netdev_ops;
1265                if (ops->ndo_netpoll_cleanup)
1266                        ops->ndo_netpoll_cleanup(np->dev);
1267
1268                rcu_assign_pointer(np->dev->npinfo, NULL);
1269                call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
1270        }
1271}
1272EXPORT_SYMBOL_GPL(__netpoll_cleanup);
1273
1274static void netpoll_async_cleanup(struct work_struct *work)
1275{
1276        struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
1277
1278        rtnl_lock();
1279        __netpoll_cleanup(np);
1280        rtnl_unlock();
1281        kfree(np);
1282}
1283
1284void __netpoll_free_async(struct netpoll *np)
1285{
1286        schedule_work(&np->cleanup_work);
1287}
1288EXPORT_SYMBOL_GPL(__netpoll_free_async);
1289
1290void netpoll_cleanup(struct netpoll *np)
1291{
1292        if (!np->dev)
1293                return;
1294
1295        rtnl_lock();
1296        __netpoll_cleanup(np);
1297        rtnl_unlock();
1298
1299        dev_put(np->dev);
1300        np->dev = NULL;
1301}
1302EXPORT_SYMBOL(netpoll_cleanup);
1303
1304int netpoll_trap(void)
1305{
1306        return atomic_read(&trapped);
1307}
1308EXPORT_SYMBOL(netpoll_trap);
1309
1310void netpoll_set_trap(int trap)
1311{
1312        if (trap)
1313                atomic_inc(&trapped);
1314        else
1315                atomic_dec(&trapped);
1316}
1317EXPORT_SYMBOL(netpoll_set_trap);
1318