linux/net/ipv4/netfilter/ip_queue.c
<<
>>
Prefs
   1/*
   2 * This is a module which is used for queueing IPv4 packets and
   3 * communicating with userspace via netlink.
   4 *
   5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
   6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12#include <linux/module.h>
  13#include <linux/skbuff.h>
  14#include <linux/init.h>
  15#include <linux/ip.h>
  16#include <linux/notifier.h>
  17#include <linux/netdevice.h>
  18#include <linux/netfilter.h>
  19#include <linux/netfilter_ipv4/ip_queue.h>
  20#include <linux/netfilter_ipv4/ip_tables.h>
  21#include <linux/netlink.h>
  22#include <linux/spinlock.h>
  23#include <linux/sysctl.h>
  24#include <linux/proc_fs.h>
  25#include <linux/seq_file.h>
  26#include <linux/security.h>
  27#include <linux/net.h>
  28#include <linux/mutex.h>
  29#include <net/net_namespace.h>
  30#include <net/sock.h>
  31#include <net/route.h>
  32#include <net/netfilter/nf_queue.h>
  33#include <net/ip.h>
  34
  35#define IPQ_QMAX_DEFAULT 1024
  36#define IPQ_PROC_FS_NAME "ip_queue"
  37#define NET_IPQ_QMAX 2088
  38#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
  39
  40typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
  41
  42static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
  43static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
  44static DEFINE_RWLOCK(queue_lock);
  45static int peer_pid __read_mostly;
  46static unsigned int copy_range __read_mostly;
  47static unsigned int queue_total;
  48static unsigned int queue_dropped = 0;
  49static unsigned int queue_user_dropped = 0;
  50static struct sock *ipqnl __read_mostly;
  51static LIST_HEAD(queue_list);
  52static DEFINE_MUTEX(ipqnl_mutex);
  53
  54static inline void
  55__ipq_enqueue_entry(struct nf_queue_entry *entry)
  56{
  57       list_add_tail(&entry->list, &queue_list);
  58       queue_total++;
  59}
  60
  61static inline int
  62__ipq_set_mode(unsigned char mode, unsigned int range)
  63{
  64        int status = 0;
  65
  66        switch(mode) {
  67        case IPQ_COPY_NONE:
  68        case IPQ_COPY_META:
  69                copy_mode = mode;
  70                copy_range = 0;
  71                break;
  72
  73        case IPQ_COPY_PACKET:
  74                copy_mode = mode;
  75                copy_range = range;
  76                if (copy_range > 0xFFFF)
  77                        copy_range = 0xFFFF;
  78                break;
  79
  80        default:
  81                status = -EINVAL;
  82
  83        }
  84        return status;
  85}
  86
  87static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
  88
  89static inline void
  90__ipq_reset(void)
  91{
  92        peer_pid = 0;
  93        net_disable_timestamp();
  94        __ipq_set_mode(IPQ_COPY_NONE, 0);
  95        __ipq_flush(NULL, 0);
  96}
  97
  98static struct nf_queue_entry *
  99ipq_find_dequeue_entry(unsigned long id)
 100{
 101        struct nf_queue_entry *entry = NULL, *i;
 102
 103        write_lock_bh(&queue_lock);
 104
 105        list_for_each_entry(i, &queue_list, list) {
 106                if ((unsigned long)i == id) {
 107                        entry = i;
 108                        break;
 109                }
 110        }
 111
 112        if (entry) {
 113                list_del(&entry->list);
 114                queue_total--;
 115        }
 116
 117        write_unlock_bh(&queue_lock);
 118        return entry;
 119}
 120
 121static void
 122__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 123{
 124        struct nf_queue_entry *entry, *next;
 125
 126        list_for_each_entry_safe(entry, next, &queue_list, list) {
 127                if (!cmpfn || cmpfn(entry, data)) {
 128                        list_del(&entry->list);
 129                        queue_total--;
 130                        nf_reinject(entry, NF_DROP);
 131                }
 132        }
 133}
 134
 135static void
 136ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 137{
 138        write_lock_bh(&queue_lock);
 139        __ipq_flush(cmpfn, data);
 140        write_unlock_bh(&queue_lock);
 141}
 142
 143static struct sk_buff *
 144ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 145{
 146        sk_buff_data_t old_tail;
 147        size_t size = 0;
 148        size_t data_len = 0;
 149        struct sk_buff *skb;
 150        struct ipq_packet_msg *pmsg;
 151        struct nlmsghdr *nlh;
 152        struct timeval tv;
 153
 154        read_lock_bh(&queue_lock);
 155
 156        switch (copy_mode) {
 157        case IPQ_COPY_META:
 158        case IPQ_COPY_NONE:
 159                size = NLMSG_SPACE(sizeof(*pmsg));
 160                break;
 161
 162        case IPQ_COPY_PACKET:
 163                if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
 164                     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
 165                    (*errp = skb_checksum_help(entry->skb))) {
 166                        read_unlock_bh(&queue_lock);
 167                        return NULL;
 168                }
 169                if (copy_range == 0 || copy_range > entry->skb->len)
 170                        data_len = entry->skb->len;
 171                else
 172                        data_len = copy_range;
 173
 174                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 175                break;
 176
 177        default:
 178                *errp = -EINVAL;
 179                read_unlock_bh(&queue_lock);
 180                return NULL;
 181        }
 182
 183        read_unlock_bh(&queue_lock);
 184
 185        skb = alloc_skb(size, GFP_ATOMIC);
 186        if (!skb)
 187                goto nlmsg_failure;
 188
 189        old_tail = skb->tail;
 190        nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 191        pmsg = NLMSG_DATA(nlh);
 192        memset(pmsg, 0, sizeof(*pmsg));
 193
 194        pmsg->packet_id       = (unsigned long )entry;
 195        pmsg->data_len        = data_len;
 196        tv = ktime_to_timeval(entry->skb->tstamp);
 197        pmsg->timestamp_sec   = tv.tv_sec;
 198        pmsg->timestamp_usec  = tv.tv_usec;
 199        pmsg->mark            = entry->skb->mark;
 200        pmsg->hook            = entry->hook;
 201        pmsg->hw_protocol     = entry->skb->protocol;
 202
 203        if (entry->indev)
 204                strcpy(pmsg->indev_name, entry->indev->name);
 205        else
 206                pmsg->indev_name[0] = '\0';
 207
 208        if (entry->outdev)
 209                strcpy(pmsg->outdev_name, entry->outdev->name);
 210        else
 211                pmsg->outdev_name[0] = '\0';
 212
 213        if (entry->indev && entry->skb->dev) {
 214                pmsg->hw_type = entry->skb->dev->type;
 215                pmsg->hw_addrlen = dev_parse_header(entry->skb,
 216                                                    pmsg->hw_addr);
 217        }
 218
 219        if (data_len)
 220                if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
 221                        BUG();
 222
 223        nlh->nlmsg_len = skb->tail - old_tail;
 224        return skb;
 225
 226nlmsg_failure:
 227        *errp = -EINVAL;
 228        printk(KERN_ERR "ip_queue: error creating packet message\n");
 229        return NULL;
 230}
 231
 232static int
 233ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 234{
 235        int status = -EINVAL;
 236        struct sk_buff *nskb;
 237
 238        if (copy_mode == IPQ_COPY_NONE)
 239                return -EAGAIN;
 240
 241        nskb = ipq_build_packet_message(entry, &status);
 242        if (nskb == NULL)
 243                return status;
 244
 245        write_lock_bh(&queue_lock);
 246
 247        if (!peer_pid)
 248                goto err_out_free_nskb;
 249
 250        if (queue_total >= queue_maxlen) {
 251                queue_dropped++;
 252                status = -ENOSPC;
 253                if (net_ratelimit())
 254                          printk (KERN_WARNING "ip_queue: full at %d entries, "
 255                                  "dropping packets(s). Dropped: %d\n", queue_total,
 256                                  queue_dropped);
 257                goto err_out_free_nskb;
 258        }
 259
 260        /* netlink_unicast will either free the nskb or attach it to a socket */
 261        status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
 262        if (status < 0) {
 263                queue_user_dropped++;
 264                goto err_out_unlock;
 265        }
 266
 267        __ipq_enqueue_entry(entry);
 268
 269        write_unlock_bh(&queue_lock);
 270        return status;
 271
 272err_out_free_nskb:
 273        kfree_skb(nskb);
 274
 275err_out_unlock:
 276        write_unlock_bh(&queue_lock);
 277        return status;
 278}
 279
 280static int
 281ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
 282{
 283        int diff;
 284        struct iphdr *user_iph = (struct iphdr *)v->payload;
 285        struct sk_buff *nskb;
 286
 287        if (v->data_len < sizeof(*user_iph))
 288                return 0;
 289        diff = v->data_len - e->skb->len;
 290        if (diff < 0) {
 291                if (pskb_trim(e->skb, v->data_len))
 292                        return -ENOMEM;
 293        } else if (diff > 0) {
 294                if (v->data_len > 0xFFFF)
 295                        return -EINVAL;
 296                if (diff > skb_tailroom(e->skb)) {
 297                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
 298                                               diff, GFP_ATOMIC);
 299                        if (!nskb) {
 300                                printk(KERN_WARNING "ip_queue: error "
 301                                      "in mangle, dropping packet\n");
 302                                return -ENOMEM;
 303                        }
 304                        kfree_skb(e->skb);
 305                        e->skb = nskb;
 306                }
 307                skb_put(e->skb, diff);
 308        }
 309        if (!skb_make_writable(e->skb, v->data_len))
 310                return -ENOMEM;
 311        skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 312        e->skb->ip_summed = CHECKSUM_NONE;
 313
 314        return 0;
 315}
 316
 317static int
 318ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 319{
 320        struct nf_queue_entry *entry;
 321
 322        if (vmsg->value > NF_MAX_VERDICT)
 323                return -EINVAL;
 324
 325        entry = ipq_find_dequeue_entry(vmsg->id);
 326        if (entry == NULL)
 327                return -ENOENT;
 328        else {
 329                int verdict = vmsg->value;
 330
 331                if (vmsg->data_len && vmsg->data_len == len)
 332                        if (ipq_mangle_ipv4(vmsg, entry) < 0)
 333                                verdict = NF_DROP;
 334
 335                nf_reinject(entry, verdict);
 336                return 0;
 337        }
 338}
 339
 340static int
 341ipq_set_mode(unsigned char mode, unsigned int range)
 342{
 343        int status;
 344
 345        write_lock_bh(&queue_lock);
 346        status = __ipq_set_mode(mode, range);
 347        write_unlock_bh(&queue_lock);
 348        return status;
 349}
 350
 351static int
 352ipq_receive_peer(struct ipq_peer_msg *pmsg,
 353                 unsigned char type, unsigned int len)
 354{
 355        int status = 0;
 356
 357        if (len < sizeof(*pmsg))
 358                return -EINVAL;
 359
 360        switch (type) {
 361        case IPQM_MODE:
 362                status = ipq_set_mode(pmsg->msg.mode.value,
 363                                      pmsg->msg.mode.range);
 364                break;
 365
 366        case IPQM_VERDICT:
 367                if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
 368                        status = -EINVAL;
 369                else
 370                        status = ipq_set_verdict(&pmsg->msg.verdict,
 371                                                 len - sizeof(*pmsg));
 372                        break;
 373        default:
 374                status = -EINVAL;
 375        }
 376        return status;
 377}
 378
 379static int
 380dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 381{
 382        if (entry->indev)
 383                if (entry->indev->ifindex == ifindex)
 384                        return 1;
 385        if (entry->outdev)
 386                if (entry->outdev->ifindex == ifindex)
 387                        return 1;
 388#ifdef CONFIG_BRIDGE_NETFILTER
 389        if (entry->skb->nf_bridge) {
 390                if (entry->skb->nf_bridge->physindev &&
 391                    entry->skb->nf_bridge->physindev->ifindex == ifindex)
 392                        return 1;
 393                if (entry->skb->nf_bridge->physoutdev &&
 394                    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
 395                        return 1;
 396        }
 397#endif
 398        return 0;
 399}
 400
 401static void
 402ipq_dev_drop(int ifindex)
 403{
 404        ipq_flush(dev_cmp, ifindex);
 405}
 406
 407#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
 408
 409static inline void
 410__ipq_rcv_skb(struct sk_buff *skb)
 411{
 412        int status, type, pid, flags, nlmsglen, skblen;
 413        struct nlmsghdr *nlh;
 414
 415        skblen = skb->len;
 416        if (skblen < sizeof(*nlh))
 417                return;
 418
 419        nlh = nlmsg_hdr(skb);
 420        nlmsglen = nlh->nlmsg_len;
 421        if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 422                return;
 423
 424        pid = nlh->nlmsg_pid;
 425        flags = nlh->nlmsg_flags;
 426
 427        if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
 428                RCV_SKB_FAIL(-EINVAL);
 429
 430        if (flags & MSG_TRUNC)
 431                RCV_SKB_FAIL(-ECOMM);
 432
 433        type = nlh->nlmsg_type;
 434        if (type < NLMSG_NOOP || type >= IPQM_MAX)
 435                RCV_SKB_FAIL(-EINVAL);
 436
 437        if (type <= IPQM_BASE)
 438                return;
 439
 440        if (security_netlink_recv(skb, CAP_NET_ADMIN))
 441                RCV_SKB_FAIL(-EPERM);
 442
 443        write_lock_bh(&queue_lock);
 444
 445        if (peer_pid) {
 446                if (peer_pid != pid) {
 447                        write_unlock_bh(&queue_lock);
 448                        RCV_SKB_FAIL(-EBUSY);
 449                }
 450        } else {
 451                net_enable_timestamp();
 452                peer_pid = pid;
 453        }
 454
 455        write_unlock_bh(&queue_lock);
 456
 457        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 458                                  nlmsglen - NLMSG_LENGTH(0));
 459        if (status < 0)
 460                RCV_SKB_FAIL(status);
 461
 462        if (flags & NLM_F_ACK)
 463                netlink_ack(skb, nlh, 0);
 464        return;
 465}
 466
 467static void
 468ipq_rcv_skb(struct sk_buff *skb)
 469{
 470        mutex_lock(&ipqnl_mutex);
 471        __ipq_rcv_skb(skb);
 472        mutex_unlock(&ipqnl_mutex);
 473}
 474
 475static int
 476ipq_rcv_dev_event(struct notifier_block *this,
 477                  unsigned long event, void *ptr)
 478{
 479        struct net_device *dev = ptr;
 480
 481        if (!net_eq(dev_net(dev), &init_net))
 482                return NOTIFY_DONE;
 483
 484        /* Drop any packets associated with the downed device */
 485        if (event == NETDEV_DOWN)
 486                ipq_dev_drop(dev->ifindex);
 487        return NOTIFY_DONE;
 488}
 489
 490static struct notifier_block ipq_dev_notifier = {
 491        .notifier_call  = ipq_rcv_dev_event,
 492};
 493
 494static int
 495ipq_rcv_nl_event(struct notifier_block *this,
 496                 unsigned long event, void *ptr)
 497{
 498        struct netlink_notify *n = ptr;
 499
 500        if (event == NETLINK_URELEASE &&
 501            n->protocol == NETLINK_FIREWALL && n->pid) {
 502                write_lock_bh(&queue_lock);
 503                if ((n->net == &init_net) && (n->pid == peer_pid))
 504                        __ipq_reset();
 505                write_unlock_bh(&queue_lock);
 506        }
 507        return NOTIFY_DONE;
 508}
 509
 510static struct notifier_block ipq_nl_notifier = {
 511        .notifier_call  = ipq_rcv_nl_event,
 512};
 513
 514#ifdef CONFIG_SYSCTL
 515static struct ctl_table_header *ipq_sysctl_header;
 516
 517static ctl_table ipq_table[] = {
 518        {
 519                .ctl_name       = NET_IPQ_QMAX,
 520                .procname       = NET_IPQ_QMAX_NAME,
 521                .data           = &queue_maxlen,
 522                .maxlen         = sizeof(queue_maxlen),
 523                .mode           = 0644,
 524                .proc_handler   = proc_dointvec
 525        },
 526        { .ctl_name = 0 }
 527};
 528#endif
 529
 530#ifdef CONFIG_PROC_FS
 531static int ip_queue_show(struct seq_file *m, void *v)
 532{
 533        read_lock_bh(&queue_lock);
 534
 535        seq_printf(m,
 536                      "Peer PID          : %d\n"
 537                      "Copy mode         : %hu\n"
 538                      "Copy range        : %u\n"
 539                      "Queue length      : %u\n"
 540                      "Queue max. length : %u\n"
 541                      "Queue dropped     : %u\n"
 542                      "Netlink dropped   : %u\n",
 543                      peer_pid,
 544                      copy_mode,
 545                      copy_range,
 546                      queue_total,
 547                      queue_maxlen,
 548                      queue_dropped,
 549                      queue_user_dropped);
 550
 551        read_unlock_bh(&queue_lock);
 552        return 0;
 553}
 554
 555static int ip_queue_open(struct inode *inode, struct file *file)
 556{
 557        return single_open(file, ip_queue_show, NULL);
 558}
 559
 560static const struct file_operations ip_queue_proc_fops = {
 561        .open           = ip_queue_open,
 562        .read           = seq_read,
 563        .llseek         = seq_lseek,
 564        .release        = single_release,
 565        .owner          = THIS_MODULE,
 566};
 567#endif
 568
 569static const struct nf_queue_handler nfqh = {
 570        .name   = "ip_queue",
 571        .outfn  = &ipq_enqueue_packet,
 572};
 573
 574static int __init ip_queue_init(void)
 575{
 576        int status = -ENOMEM;
 577        struct proc_dir_entry *proc __maybe_unused;
 578
 579        netlink_register_notifier(&ipq_nl_notifier);
 580        ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
 581                                      ipq_rcv_skb, NULL, THIS_MODULE);
 582        if (ipqnl == NULL) {
 583                printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
 584                goto cleanup_netlink_notifier;
 585        }
 586
 587#ifdef CONFIG_PROC_FS
 588        proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
 589                           &ip_queue_proc_fops);
 590        if (!proc) {
 591                printk(KERN_ERR "ip_queue: failed to create proc entry\n");
 592                goto cleanup_ipqnl;
 593        }
 594#endif
 595        register_netdevice_notifier(&ipq_dev_notifier);
 596#ifdef CONFIG_SYSCTL
 597        ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
 598#endif
 599        status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
 600        if (status < 0) {
 601                printk(KERN_ERR "ip_queue: failed to register queue handler\n");
 602                goto cleanup_sysctl;
 603        }
 604        return status;
 605
 606cleanup_sysctl:
 607#ifdef CONFIG_SYSCTL
 608        unregister_sysctl_table(ipq_sysctl_header);
 609#endif
 610        unregister_netdevice_notifier(&ipq_dev_notifier);
 611        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 612cleanup_ipqnl: __maybe_unused
 613        netlink_kernel_release(ipqnl);
 614        mutex_lock(&ipqnl_mutex);
 615        mutex_unlock(&ipqnl_mutex);
 616
 617cleanup_netlink_notifier:
 618        netlink_unregister_notifier(&ipq_nl_notifier);
 619        return status;
 620}
 621
 622static void __exit ip_queue_fini(void)
 623{
 624        nf_unregister_queue_handlers(&nfqh);
 625        synchronize_net();
 626        ipq_flush(NULL, 0);
 627
 628#ifdef CONFIG_SYSCTL
 629        unregister_sysctl_table(ipq_sysctl_header);
 630#endif
 631        unregister_netdevice_notifier(&ipq_dev_notifier);
 632        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 633
 634        netlink_kernel_release(ipqnl);
 635        mutex_lock(&ipqnl_mutex);
 636        mutex_unlock(&ipqnl_mutex);
 637
 638        netlink_unregister_notifier(&ipq_nl_notifier);
 639}
 640
 641MODULE_DESCRIPTION("IPv4 packet queue handler");
 642MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 643MODULE_LICENSE("GPL");
 644MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
 645
 646module_init(ip_queue_init);
 647module_exit(ip_queue_fini);
 648