linux/net/core/drop_monitor.c
<<
>>
Prefs
   1/*
   2 * Monitoring code for network dropped packet alerts
   3 *
   4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
   5 */
   6
   7#include <linux/netdevice.h>
   8#include <linux/etherdevice.h>
   9#include <linux/string.h>
  10#include <linux/if_arp.h>
  11#include <linux/inetdevice.h>
  12#include <linux/inet.h>
  13#include <linux/interrupt.h>
  14#include <linux/netpoll.h>
  15#include <linux/sched.h>
  16#include <linux/delay.h>
  17#include <linux/types.h>
  18#include <linux/workqueue.h>
  19#include <linux/netlink.h>
  20#include <linux/net_dropmon.h>
  21#include <linux/percpu.h>
  22#include <linux/timer.h>
  23#include <linux/bitops.h>
  24#include <linux/slab.h>
  25#include <net/genetlink.h>
  26#include <net/netevent.h>
  27
  28#include <trace/events/skb.h>
  29#include <trace/events/napi.h>
  30
  31#include <asm/unaligned.h>
  32
  33#define TRACE_ON 1
  34#define TRACE_OFF 0
  35
  36static void send_dm_alert(struct work_struct *unused);
  37
  38
  39/*
  40 * Globals, our netlink socket pointer
  41 * and the work handle that will send up
  42 * netlink alerts
  43 */
  44static int trace_state = TRACE_OFF;
  45static DEFINE_SPINLOCK(trace_state_lock);
  46
  47struct per_cpu_dm_data {
  48        struct work_struct dm_alert_work;
  49        struct sk_buff *skb;
  50        atomic_t dm_hit_count;
  51        struct timer_list send_timer;
  52};
  53
  54struct dm_hw_stat_delta {
  55        struct net_device *dev;
  56        unsigned long last_rx;
  57        struct list_head list;
  58        struct rcu_head rcu;
  59        unsigned long last_drop_val;
  60};
  61
  62static struct genl_family net_drop_monitor_family = {
  63        .id             = GENL_ID_GENERATE,
  64        .hdrsize        = 0,
  65        .name           = "NET_DM",
  66        .version        = 2,
  67        .maxattr        = NET_DM_CMD_MAX,
  68};
  69
  70static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
  71
  72static int dm_hit_limit = 64;
  73static int dm_delay = 1;
  74static unsigned long dm_hw_check_delta = 2*HZ;
  75static LIST_HEAD(hw_stats_list);
  76
  77static void reset_per_cpu_data(struct per_cpu_dm_data *data)
  78{
  79        size_t al;
  80        struct net_dm_alert_msg *msg;
  81        struct nlattr *nla;
  82
  83        al = sizeof(struct net_dm_alert_msg);
  84        al += dm_hit_limit * sizeof(struct net_dm_drop_point);
  85        al += sizeof(struct nlattr);
  86
  87        data->skb = genlmsg_new(al, GFP_KERNEL);
  88        genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
  89                        0, NET_DM_CMD_ALERT);
  90        nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
  91        msg = nla_data(nla);
  92        memset(msg, 0, al);
  93        atomic_set(&data->dm_hit_count, dm_hit_limit);
  94}
  95
  96static void send_dm_alert(struct work_struct *unused)
  97{
  98        struct sk_buff *skb;
  99        struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
 100
 101        /*
 102         * Grab the skb we're about to send
 103         */
 104        skb = data->skb;
 105
 106        /*
 107         * Replace it with a new one
 108         */
 109        reset_per_cpu_data(data);
 110
 111        /*
 112         * Ship it!
 113         */
 114        genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
 115
 116}
 117
 118/*
 119 * This is the timer function to delay the sending of an alert
 120 * in the event that more drops will arrive during the
 121 * hysteresis period.  Note that it operates under the timer interrupt
 122 * so we don't need to disable preemption here
 123 */
 124static void sched_send_work(unsigned long unused)
 125{
 126        struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
 127
 128        schedule_work(&data->dm_alert_work);
 129}
 130
 131static void trace_drop_common(struct sk_buff *skb, void *location)
 132{
 133        struct net_dm_alert_msg *msg;
 134        struct nlmsghdr *nlh;
 135        struct nlattr *nla;
 136        int i;
 137        struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
 138
 139
 140        if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
 141                /*
 142                 * we're already at zero, discard this hit
 143                 */
 144                goto out;
 145        }
 146
 147        nlh = (struct nlmsghdr *)data->skb->data;
 148        nla = genlmsg_data(nlmsg_data(nlh));
 149        msg = nla_data(nla);
 150        for (i = 0; i < msg->entries; i++) {
 151                if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
 152                        msg->points[i].count++;
 153                        goto out;
 154                }
 155        }
 156
 157        /*
 158         * We need to create a new entry
 159         */
 160        __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
 161        nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
 162        memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
 163        msg->points[msg->entries].count = 1;
 164        msg->entries++;
 165
 166        if (!timer_pending(&data->send_timer)) {
 167                data->send_timer.expires = jiffies + dm_delay * HZ;
 168                add_timer_on(&data->send_timer, smp_processor_id());
 169        }
 170
 171out:
 172        return;
 173}
 174
 175static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
 176{
 177        trace_drop_common(skb, location);
 178}
 179
 180static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
 181{
 182        struct dm_hw_stat_delta *new_stat;
 183
 184        /*
 185         * Don't check napi structures with no associated device
 186         */
 187        if (!napi->dev)
 188                return;
 189
 190        rcu_read_lock();
 191        list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
 192                /*
 193                 * only add a note to our monitor buffer if:
 194                 * 1) this is the dev we received on
 195                 * 2) its after the last_rx delta
 196                 * 3) our rx_dropped count has gone up
 197                 */
 198                if ((new_stat->dev == napi->dev)  &&
 199                    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
 200                    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
 201                        trace_drop_common(NULL, NULL);
 202                        new_stat->last_drop_val = napi->dev->stats.rx_dropped;
 203                        new_stat->last_rx = jiffies;
 204                        break;
 205                }
 206        }
 207        rcu_read_unlock();
 208}
 209
 210
 211static void free_dm_hw_stat(struct rcu_head *head)
 212{
 213        struct dm_hw_stat_delta *n;
 214        n = container_of(head, struct dm_hw_stat_delta, rcu);
 215        kfree(n);
 216}
 217
 218static int set_all_monitor_traces(int state)
 219{
 220        int rc = 0;
 221        struct dm_hw_stat_delta *new_stat = NULL;
 222        struct dm_hw_stat_delta *temp;
 223
 224        spin_lock(&trace_state_lock);
 225
 226        if (state == trace_state) {
 227                rc = -EAGAIN;
 228                goto out_unlock;
 229        }
 230
 231        switch (state) {
 232        case TRACE_ON:
 233                rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
 234                rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
 235                break;
 236        case TRACE_OFF:
 237                rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
 238                rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
 239
 240                tracepoint_synchronize_unregister();
 241
 242                /*
 243                 * Clean the device list
 244                 */
 245                list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
 246                        if (new_stat->dev == NULL) {
 247                                list_del_rcu(&new_stat->list);
 248                                call_rcu(&new_stat->rcu, free_dm_hw_stat);
 249                        }
 250                }
 251                break;
 252        default:
 253                rc = 1;
 254                break;
 255        }
 256
 257        if (!rc)
 258                trace_state = state;
 259        else
 260                rc = -EINPROGRESS;
 261
 262out_unlock:
 263        spin_unlock(&trace_state_lock);
 264
 265        return rc;
 266}
 267
 268
 269static int net_dm_cmd_config(struct sk_buff *skb,
 270                        struct genl_info *info)
 271{
 272        return -ENOTSUPP;
 273}
 274
 275static int net_dm_cmd_trace(struct sk_buff *skb,
 276                        struct genl_info *info)
 277{
 278        switch (info->genlhdr->cmd) {
 279        case NET_DM_CMD_START:
 280                return set_all_monitor_traces(TRACE_ON);
 281                break;
 282        case NET_DM_CMD_STOP:
 283                return set_all_monitor_traces(TRACE_OFF);
 284                break;
 285        }
 286
 287        return -ENOTSUPP;
 288}
 289
 290static int dropmon_net_event(struct notifier_block *ev_block,
 291                        unsigned long event, void *ptr)
 292{
 293        struct net_device *dev = ptr;
 294        struct dm_hw_stat_delta *new_stat = NULL;
 295        struct dm_hw_stat_delta *tmp;
 296
 297        switch (event) {
 298        case NETDEV_REGISTER:
 299                new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
 300
 301                if (!new_stat)
 302                        goto out;
 303
 304                new_stat->dev = dev;
 305                new_stat->last_rx = jiffies;
 306                spin_lock(&trace_state_lock);
 307                list_add_rcu(&new_stat->list, &hw_stats_list);
 308                spin_unlock(&trace_state_lock);
 309                break;
 310        case NETDEV_UNREGISTER:
 311                spin_lock(&trace_state_lock);
 312                list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
 313                        if (new_stat->dev == dev) {
 314                                new_stat->dev = NULL;
 315                                if (trace_state == TRACE_OFF) {
 316                                        list_del_rcu(&new_stat->list);
 317                                        call_rcu(&new_stat->rcu, free_dm_hw_stat);
 318                                        break;
 319                                }
 320                        }
 321                }
 322                spin_unlock(&trace_state_lock);
 323                break;
 324        }
 325out:
 326        return NOTIFY_DONE;
 327}
 328
 329static struct genl_ops dropmon_ops[] = {
 330        {
 331                .cmd = NET_DM_CMD_CONFIG,
 332                .doit = net_dm_cmd_config,
 333        },
 334        {
 335                .cmd = NET_DM_CMD_START,
 336                .doit = net_dm_cmd_trace,
 337        },
 338        {
 339                .cmd = NET_DM_CMD_STOP,
 340                .doit = net_dm_cmd_trace,
 341        },
 342};
 343
 344static struct notifier_block dropmon_net_notifier = {
 345        .notifier_call = dropmon_net_event
 346};
 347
 348static int __init init_net_drop_monitor(void)
 349{
 350        struct per_cpu_dm_data *data;
 351        int cpu, rc;
 352
 353        printk(KERN_INFO "Initializing network drop monitor service\n");
 354
 355        if (sizeof(void *) > 8) {
 356                printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
 357                return -ENOSPC;
 358        }
 359
 360        rc = genl_register_family_with_ops(&net_drop_monitor_family,
 361                                           dropmon_ops,
 362                                           ARRAY_SIZE(dropmon_ops));
 363        if (rc) {
 364                printk(KERN_ERR "Could not create drop monitor netlink family\n");
 365                return rc;
 366        }
 367
 368        rc = register_netdevice_notifier(&dropmon_net_notifier);
 369        if (rc < 0) {
 370                printk(KERN_CRIT "Failed to register netdevice notifier\n");
 371                goto out_unreg;
 372        }
 373
 374        rc = 0;
 375
 376        for_each_present_cpu(cpu) {
 377                data = &per_cpu(dm_cpu_data, cpu);
 378                reset_per_cpu_data(data);
 379                INIT_WORK(&data->dm_alert_work, send_dm_alert);
 380                init_timer(&data->send_timer);
 381                data->send_timer.data = cpu;
 382                data->send_timer.function = sched_send_work;
 383        }
 384
 385        goto out;
 386
 387out_unreg:
 388        genl_unregister_family(&net_drop_monitor_family);
 389out:
 390        return rc;
 391}
 392
 393late_initcall(init_net_drop_monitor);
 394