linux/net/ipv6/netfilter/ip6_queue.c
<<
>>
Prefs
   1/*
   2 * This is a module which is used for queueing IPv6 packets and
   3 * communicating with userspace via netlink.
   4 *
   5 * (C) 2001 Fernando Anton, this code is GPL.
   6 *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
   7 *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
   8 *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
   9 *     email: fanton@it.uc3m.es
  10 *
  11 * This program is free software; you can redistribute it and/or modify
  12 * it under the terms of the GNU General Public License version 2 as
  13 * published by the Free Software Foundation.
  14 */
  15#include <linux/module.h>
  16#include <linux/skbuff.h>
  17#include <linux/init.h>
  18#include <linux/ipv6.h>
  19#include <linux/notifier.h>
  20#include <linux/netdevice.h>
  21#include <linux/netfilter.h>
  22#include <linux/netlink.h>
  23#include <linux/spinlock.h>
  24#include <linux/sysctl.h>
  25#include <linux/proc_fs.h>
  26#include <linux/seq_file.h>
  27#include <linux/mutex.h>
  28#include <net/net_namespace.h>
  29#include <net/sock.h>
  30#include <net/ipv6.h>
  31#include <net/ip6_route.h>
  32#include <net/netfilter/nf_queue.h>
  33#include <linux/netfilter_ipv4/ip_queue.h>
  34#include <linux/netfilter_ipv4/ip_tables.h>
  35#include <linux/netfilter_ipv6/ip6_tables.h>
  36
  37#define IPQ_QMAX_DEFAULT 1024
  38#define IPQ_PROC_FS_NAME "ip6_queue"
  39#define NET_IPQ_QMAX 2088
  40#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
  41
  42typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
  43
  44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
  45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
  46static DEFINE_RWLOCK(queue_lock);
  47static int peer_pid __read_mostly;
  48static unsigned int copy_range __read_mostly;
  49static unsigned int queue_total;
  50static unsigned int queue_dropped = 0;
  51static unsigned int queue_user_dropped = 0;
  52static struct sock *ipqnl __read_mostly;
  53static LIST_HEAD(queue_list);
  54static DEFINE_MUTEX(ipqnl_mutex);
  55
  56static inline void
  57__ipq_enqueue_entry(struct nf_queue_entry *entry)
  58{
  59       list_add_tail(&entry->list, &queue_list);
  60       queue_total++;
  61}
  62
  63static inline int
  64__ipq_set_mode(unsigned char mode, unsigned int range)
  65{
  66        int status = 0;
  67
  68        switch(mode) {
  69        case IPQ_COPY_NONE:
  70        case IPQ_COPY_META:
  71                copy_mode = mode;
  72                copy_range = 0;
  73                break;
  74
  75        case IPQ_COPY_PACKET:
  76                copy_mode = mode;
  77                copy_range = range;
  78                if (copy_range > 0xFFFF)
  79                        copy_range = 0xFFFF;
  80                break;
  81
  82        default:
  83                status = -EINVAL;
  84
  85        }
  86        return status;
  87}
  88
  89static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
  90
  91static inline void
  92__ipq_reset(void)
  93{
  94        peer_pid = 0;
  95        net_disable_timestamp();
  96        __ipq_set_mode(IPQ_COPY_NONE, 0);
  97        __ipq_flush(NULL, 0);
  98}
  99
 100static struct nf_queue_entry *
 101ipq_find_dequeue_entry(unsigned long id)
 102{
 103        struct nf_queue_entry *entry = NULL, *i;
 104
 105        write_lock_bh(&queue_lock);
 106
 107        list_for_each_entry(i, &queue_list, list) {
 108                if ((unsigned long)i == id) {
 109                        entry = i;
 110                        break;
 111                }
 112        }
 113
 114        if (entry) {
 115                list_del(&entry->list);
 116                queue_total--;
 117        }
 118
 119        write_unlock_bh(&queue_lock);
 120        return entry;
 121}
 122
 123static void
 124__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 125{
 126        struct nf_queue_entry *entry, *next;
 127
 128        list_for_each_entry_safe(entry, next, &queue_list, list) {
 129                if (!cmpfn || cmpfn(entry, data)) {
 130                        list_del(&entry->list);
 131                        queue_total--;
 132                        nf_reinject(entry, NF_DROP);
 133                }
 134        }
 135}
 136
 137static void
 138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 139{
 140        write_lock_bh(&queue_lock);
 141        __ipq_flush(cmpfn, data);
 142        write_unlock_bh(&queue_lock);
 143}
 144
 145static struct sk_buff *
 146ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 147{
 148        sk_buff_data_t old_tail;
 149        size_t size = 0;
 150        size_t data_len = 0;
 151        struct sk_buff *skb;
 152        struct ipq_packet_msg *pmsg;
 153        struct nlmsghdr *nlh;
 154        struct timeval tv;
 155
 156        read_lock_bh(&queue_lock);
 157
 158        switch (copy_mode) {
 159        case IPQ_COPY_META:
 160        case IPQ_COPY_NONE:
 161                size = NLMSG_SPACE(sizeof(*pmsg));
 162                break;
 163
 164        case IPQ_COPY_PACKET:
 165                if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
 166                     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
 167                    (*errp = skb_checksum_help(entry->skb))) {
 168                        read_unlock_bh(&queue_lock);
 169                        return NULL;
 170                }
 171                if (copy_range == 0 || copy_range > entry->skb->len)
 172                        data_len = entry->skb->len;
 173                else
 174                        data_len = copy_range;
 175
 176                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 177                break;
 178
 179        default:
 180                *errp = -EINVAL;
 181                read_unlock_bh(&queue_lock);
 182                return NULL;
 183        }
 184
 185        read_unlock_bh(&queue_lock);
 186
 187        skb = alloc_skb(size, GFP_ATOMIC);
 188        if (!skb)
 189                goto nlmsg_failure;
 190
 191        old_tail = skb->tail;
 192        nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 193        pmsg = NLMSG_DATA(nlh);
 194        memset(pmsg, 0, sizeof(*pmsg));
 195
 196        pmsg->packet_id       = (unsigned long )entry;
 197        pmsg->data_len        = data_len;
 198        tv = ktime_to_timeval(entry->skb->tstamp);
 199        pmsg->timestamp_sec   = tv.tv_sec;
 200        pmsg->timestamp_usec  = tv.tv_usec;
 201        pmsg->mark            = entry->skb->mark;
 202        pmsg->hook            = entry->hook;
 203        pmsg->hw_protocol     = entry->skb->protocol;
 204
 205        if (entry->indev)
 206                strcpy(pmsg->indev_name, entry->indev->name);
 207        else
 208                pmsg->indev_name[0] = '\0';
 209
 210        if (entry->outdev)
 211                strcpy(pmsg->outdev_name, entry->outdev->name);
 212        else
 213                pmsg->outdev_name[0] = '\0';
 214
 215        if (entry->indev && entry->skb->dev) {
 216                pmsg->hw_type = entry->skb->dev->type;
 217                pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
 218        }
 219
 220        if (data_len)
 221                if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
 222                        BUG();
 223
 224        nlh->nlmsg_len = skb->tail - old_tail;
 225        return skb;
 226
 227nlmsg_failure:
 228        *errp = -EINVAL;
 229        printk(KERN_ERR "ip6_queue: error creating packet message\n");
 230        return NULL;
 231}
 232
 233static int
 234ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 235{
 236        int status = -EINVAL;
 237        struct sk_buff *nskb;
 238
 239        if (copy_mode == IPQ_COPY_NONE)
 240                return -EAGAIN;
 241
 242        nskb = ipq_build_packet_message(entry, &status);
 243        if (nskb == NULL)
 244                return status;
 245
 246        write_lock_bh(&queue_lock);
 247
 248        if (!peer_pid)
 249                goto err_out_free_nskb;
 250
 251        if (queue_total >= queue_maxlen) {
 252                queue_dropped++;
 253                status = -ENOSPC;
 254                if (net_ratelimit())
 255                        printk (KERN_WARNING "ip6_queue: fill at %d entries, "
 256                                "dropping packet(s).  Dropped: %d\n", queue_total,
 257                                queue_dropped);
 258                goto err_out_free_nskb;
 259        }
 260
 261        /* netlink_unicast will either free the nskb or attach it to a socket */
 262        status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
 263        if (status < 0) {
 264                queue_user_dropped++;
 265                goto err_out_unlock;
 266        }
 267
 268        __ipq_enqueue_entry(entry);
 269
 270        write_unlock_bh(&queue_lock);
 271        return status;
 272
 273err_out_free_nskb:
 274        kfree_skb(nskb);
 275
 276err_out_unlock:
 277        write_unlock_bh(&queue_lock);
 278        return status;
 279}
 280
 281static int
 282ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
 283{
 284        int diff;
 285        struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
 286        struct sk_buff *nskb;
 287
 288        if (v->data_len < sizeof(*user_iph))
 289                return 0;
 290        diff = v->data_len - e->skb->len;
 291        if (diff < 0) {
 292                if (pskb_trim(e->skb, v->data_len))
 293                        return -ENOMEM;
 294        } else if (diff > 0) {
 295                if (v->data_len > 0xFFFF)
 296                        return -EINVAL;
 297                if (diff > skb_tailroom(e->skb)) {
 298                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
 299                                               diff, GFP_ATOMIC);
 300                        if (!nskb) {
 301                                printk(KERN_WARNING "ip6_queue: OOM "
 302                                      "in mangle, dropping packet\n");
 303                                return -ENOMEM;
 304                        }
 305                        kfree_skb(e->skb);
 306                        e->skb = nskb;
 307                }
 308                skb_put(e->skb, diff);
 309        }
 310        if (!skb_make_writable(e->skb, v->data_len))
 311                return -ENOMEM;
 312        skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 313        e->skb->ip_summed = CHECKSUM_NONE;
 314
 315        return 0;
 316}
 317
 318static int
 319ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 320{
 321        struct nf_queue_entry *entry;
 322
 323        if (vmsg->value > NF_MAX_VERDICT)
 324                return -EINVAL;
 325
 326        entry = ipq_find_dequeue_entry(vmsg->id);
 327        if (entry == NULL)
 328                return -ENOENT;
 329        else {
 330                int verdict = vmsg->value;
 331
 332                if (vmsg->data_len && vmsg->data_len == len)
 333                        if (ipq_mangle_ipv6(vmsg, entry) < 0)
 334                                verdict = NF_DROP;
 335
 336                nf_reinject(entry, verdict);
 337                return 0;
 338        }
 339}
 340
 341static int
 342ipq_set_mode(unsigned char mode, unsigned int range)
 343{
 344        int status;
 345
 346        write_lock_bh(&queue_lock);
 347        status = __ipq_set_mode(mode, range);
 348        write_unlock_bh(&queue_lock);
 349        return status;
 350}
 351
 352static int
 353ipq_receive_peer(struct ipq_peer_msg *pmsg,
 354                 unsigned char type, unsigned int len)
 355{
 356        int status = 0;
 357
 358        if (len < sizeof(*pmsg))
 359                return -EINVAL;
 360
 361        switch (type) {
 362        case IPQM_MODE:
 363                status = ipq_set_mode(pmsg->msg.mode.value,
 364                                      pmsg->msg.mode.range);
 365                break;
 366
 367        case IPQM_VERDICT:
 368                if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
 369                        status = -EINVAL;
 370                else
 371                        status = ipq_set_verdict(&pmsg->msg.verdict,
 372                                                 len - sizeof(*pmsg));
 373                        break;
 374        default:
 375                status = -EINVAL;
 376        }
 377        return status;
 378}
 379
 380static int
 381dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 382{
 383        if (entry->indev)
 384                if (entry->indev->ifindex == ifindex)
 385                        return 1;
 386
 387        if (entry->outdev)
 388                if (entry->outdev->ifindex == ifindex)
 389                        return 1;
 390#ifdef CONFIG_BRIDGE_NETFILTER
 391        if (entry->skb->nf_bridge) {
 392                if (entry->skb->nf_bridge->physindev &&
 393                    entry->skb->nf_bridge->physindev->ifindex == ifindex)
 394                        return 1;
 395                if (entry->skb->nf_bridge->physoutdev &&
 396                    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
 397                        return 1;
 398        }
 399#endif
 400        return 0;
 401}
 402
 403static void
 404ipq_dev_drop(int ifindex)
 405{
 406        ipq_flush(dev_cmp, ifindex);
 407}
 408
 409#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
 410
 411static inline void
 412__ipq_rcv_skb(struct sk_buff *skb)
 413{
 414        int status, type, pid, flags, nlmsglen, skblen;
 415        struct nlmsghdr *nlh;
 416
 417        skblen = skb->len;
 418        if (skblen < sizeof(*nlh))
 419                return;
 420
 421        nlh = nlmsg_hdr(skb);
 422        nlmsglen = nlh->nlmsg_len;
 423        if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 424                return;
 425
 426        pid = nlh->nlmsg_pid;
 427        flags = nlh->nlmsg_flags;
 428
 429        if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
 430                RCV_SKB_FAIL(-EINVAL);
 431
 432        if (flags & MSG_TRUNC)
 433                RCV_SKB_FAIL(-ECOMM);
 434
 435        type = nlh->nlmsg_type;
 436        if (type < NLMSG_NOOP || type >= IPQM_MAX)
 437                RCV_SKB_FAIL(-EINVAL);
 438
 439        if (type <= IPQM_BASE)
 440                return;
 441
 442        if (security_netlink_recv(skb, CAP_NET_ADMIN))
 443                RCV_SKB_FAIL(-EPERM);
 444
 445        write_lock_bh(&queue_lock);
 446
 447        if (peer_pid) {
 448                if (peer_pid != pid) {
 449                        write_unlock_bh(&queue_lock);
 450                        RCV_SKB_FAIL(-EBUSY);
 451                }
 452        } else {
 453                net_enable_timestamp();
 454                peer_pid = pid;
 455        }
 456
 457        write_unlock_bh(&queue_lock);
 458
 459        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 460                                  nlmsglen - NLMSG_LENGTH(0));
 461        if (status < 0)
 462                RCV_SKB_FAIL(status);
 463
 464        if (flags & NLM_F_ACK)
 465                netlink_ack(skb, nlh, 0);
 466        return;
 467}
 468
 469static void
 470ipq_rcv_skb(struct sk_buff *skb)
 471{
 472        mutex_lock(&ipqnl_mutex);
 473        __ipq_rcv_skb(skb);
 474        mutex_unlock(&ipqnl_mutex);
 475}
 476
 477static int
 478ipq_rcv_dev_event(struct notifier_block *this,
 479                  unsigned long event, void *ptr)
 480{
 481        struct net_device *dev = ptr;
 482
 483        if (!net_eq(dev_net(dev), &init_net))
 484                return NOTIFY_DONE;
 485
 486        /* Drop any packets associated with the downed device */
 487        if (event == NETDEV_DOWN)
 488                ipq_dev_drop(dev->ifindex);
 489        return NOTIFY_DONE;
 490}
 491
 492static struct notifier_block ipq_dev_notifier = {
 493        .notifier_call  = ipq_rcv_dev_event,
 494};
 495
 496static int
 497ipq_rcv_nl_event(struct notifier_block *this,
 498                 unsigned long event, void *ptr)
 499{
 500        struct netlink_notify *n = ptr;
 501
 502        if (event == NETLINK_URELEASE &&
 503            n->protocol == NETLINK_IP6_FW && n->pid) {
 504                write_lock_bh(&queue_lock);
 505                if ((n->net == &init_net) && (n->pid == peer_pid))
 506                        __ipq_reset();
 507                write_unlock_bh(&queue_lock);
 508        }
 509        return NOTIFY_DONE;
 510}
 511
 512static struct notifier_block ipq_nl_notifier = {
 513        .notifier_call  = ipq_rcv_nl_event,
 514};
 515
 516#ifdef CONFIG_SYSCTL
 517static struct ctl_table_header *ipq_sysctl_header;
 518
 519static ctl_table ipq_table[] = {
 520        {
 521                .ctl_name       = NET_IPQ_QMAX,
 522                .procname       = NET_IPQ_QMAX_NAME,
 523                .data           = &queue_maxlen,
 524                .maxlen         = sizeof(queue_maxlen),
 525                .mode           = 0644,
 526                .proc_handler   = proc_dointvec
 527        },
 528        { .ctl_name = 0 }
 529};
 530#endif
 531
 532#ifdef CONFIG_PROC_FS
 533static int ip6_queue_show(struct seq_file *m, void *v)
 534{
 535        read_lock_bh(&queue_lock);
 536
 537        seq_printf(m,
 538                      "Peer PID          : %d\n"
 539                      "Copy mode         : %hu\n"
 540                      "Copy range        : %u\n"
 541                      "Queue length      : %u\n"
 542                      "Queue max. length : %u\n"
 543                      "Queue dropped     : %u\n"
 544                      "Netfilter dropped : %u\n",
 545                      peer_pid,
 546                      copy_mode,
 547                      copy_range,
 548                      queue_total,
 549                      queue_maxlen,
 550                      queue_dropped,
 551                      queue_user_dropped);
 552
 553        read_unlock_bh(&queue_lock);
 554        return 0;
 555}
 556
 557static int ip6_queue_open(struct inode *inode, struct file *file)
 558{
 559        return single_open(file, ip6_queue_show, NULL);
 560}
 561
 562static const struct file_operations ip6_queue_proc_fops = {
 563        .open           = ip6_queue_open,
 564        .read           = seq_read,
 565        .llseek         = seq_lseek,
 566        .release        = single_release,
 567        .owner          = THIS_MODULE,
 568};
 569#endif
 570
 571static const struct nf_queue_handler nfqh = {
 572        .name   = "ip6_queue",
 573        .outfn  = &ipq_enqueue_packet,
 574};
 575
 576static int __init ip6_queue_init(void)
 577{
 578        int status = -ENOMEM;
 579        struct proc_dir_entry *proc __maybe_unused;
 580
 581        netlink_register_notifier(&ipq_nl_notifier);
 582        ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
 583                                      ipq_rcv_skb, NULL, THIS_MODULE);
 584        if (ipqnl == NULL) {
 585                printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
 586                goto cleanup_netlink_notifier;
 587        }
 588
 589#ifdef CONFIG_PROC_FS
 590        proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
 591                           &ip6_queue_proc_fops);
 592        if (!proc) {
 593                printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
 594                goto cleanup_ipqnl;
 595        }
 596#endif
 597        register_netdevice_notifier(&ipq_dev_notifier);
 598#ifdef CONFIG_SYSCTL
 599        ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
 600#endif
 601        status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
 602        if (status < 0) {
 603                printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
 604                goto cleanup_sysctl;
 605        }
 606        return status;
 607
 608cleanup_sysctl:
 609#ifdef CONFIG_SYSCTL
 610        unregister_sysctl_table(ipq_sysctl_header);
 611#endif
 612        unregister_netdevice_notifier(&ipq_dev_notifier);
 613        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 614
 615cleanup_ipqnl: __maybe_unused
 616        netlink_kernel_release(ipqnl);
 617        mutex_lock(&ipqnl_mutex);
 618        mutex_unlock(&ipqnl_mutex);
 619
 620cleanup_netlink_notifier:
 621        netlink_unregister_notifier(&ipq_nl_notifier);
 622        return status;
 623}
 624
 625static void __exit ip6_queue_fini(void)
 626{
 627        nf_unregister_queue_handlers(&nfqh);
 628        synchronize_net();
 629        ipq_flush(NULL, 0);
 630
 631#ifdef CONFIG_SYSCTL
 632        unregister_sysctl_table(ipq_sysctl_header);
 633#endif
 634        unregister_netdevice_notifier(&ipq_dev_notifier);
 635        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 636
 637        netlink_kernel_release(ipqnl);
 638        mutex_lock(&ipqnl_mutex);
 639        mutex_unlock(&ipqnl_mutex);
 640
 641        netlink_unregister_notifier(&ipq_nl_notifier);
 642}
 643
 644MODULE_DESCRIPTION("IPv6 packet queue handler");
 645MODULE_LICENSE("GPL");
 646MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
 647
 648module_init(ip6_queue_init);
 649module_exit(ip6_queue_fini);
 650