linux/net/netfilter/xt_hashlimit.c
<<
>>
Prefs
   1/*
   2 *      xt_hashlimit - Netfilter module to limit the number of packets per time
   3 *      seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
   4 *
   5 *      (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
   6 *      Copyright © CC Computer Consultants GmbH, 2007 - 2008
   7 *
   8 * Development of this code was funded by Astaro AG, http://www.astaro.com/
   9 */
  10#include <linux/module.h>
  11#include <linux/spinlock.h>
  12#include <linux/random.h>
  13#include <linux/jhash.h>
  14#include <linux/slab.h>
  15#include <linux/vmalloc.h>
  16#include <linux/proc_fs.h>
  17#include <linux/seq_file.h>
  18#include <linux/list.h>
  19#include <linux/skbuff.h>
  20#include <linux/mm.h>
  21#include <linux/in.h>
  22#include <linux/ip.h>
  23#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
  24#include <linux/ipv6.h>
  25#include <net/ipv6.h>
  26#endif
  27
  28#include <net/net_namespace.h>
  29
  30#include <linux/netfilter/x_tables.h>
  31#include <linux/netfilter_ipv4/ip_tables.h>
  32#include <linux/netfilter_ipv6/ip6_tables.h>
  33#include <linux/netfilter/xt_hashlimit.h>
  34#include <linux/mutex.h>
  35
  36MODULE_LICENSE("GPL");
  37MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
  38MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
  39MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
  40MODULE_ALIAS("ipt_hashlimit");
  41MODULE_ALIAS("ip6t_hashlimit");
  42
  43/* need to declare this at the top */
  44static struct proc_dir_entry *hashlimit_procdir4;
  45static struct proc_dir_entry *hashlimit_procdir6;
  46static const struct file_operations dl_file_ops;
  47
  48/* hash table crap */
  49struct dsthash_dst {
  50        union {
  51                struct {
  52                        __be32 src;
  53                        __be32 dst;
  54                } ip;
  55#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
  56                struct {
  57                        __be32 src[4];
  58                        __be32 dst[4];
  59                } ip6;
  60#endif
  61        };
  62        __be16 src_port;
  63        __be16 dst_port;
  64};
  65
  66struct dsthash_ent {
  67        /* static / read-only parts in the beginning */
  68        struct hlist_node node;
  69        struct dsthash_dst dst;
  70
  71        /* modified structure members in the end */
  72        unsigned long expires;          /* precalculated expiry time */
  73        struct {
  74                unsigned long prev;     /* last modification */
  75                u_int32_t credit;
  76                u_int32_t credit_cap, cost;
  77        } rateinfo;
  78};
  79
  80struct xt_hashlimit_htable {
  81        struct hlist_node node;         /* global list of all htables */
  82        atomic_t use;
  83        u_int8_t family;
  84
  85        struct hashlimit_cfg1 cfg;      /* config */
  86
  87        /* used internally */
  88        spinlock_t lock;                /* lock for list_head */
  89        u_int32_t rnd;                  /* random seed for hash */
  90        int rnd_initialized;
  91        unsigned int count;             /* number entries in table */
  92        struct timer_list timer;        /* timer for gc */
  93
  94        /* seq_file stuff */
  95        struct proc_dir_entry *pde;
  96
  97        struct hlist_head hash[0];      /* hashtable itself */
  98};
  99
 100static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
 101static DEFINE_MUTEX(hlimit_mutex);      /* additional checkentry protection */
 102static HLIST_HEAD(hashlimit_htables);
 103static struct kmem_cache *hashlimit_cachep __read_mostly;
 104
 105static inline bool dst_cmp(const struct dsthash_ent *ent,
 106                           const struct dsthash_dst *b)
 107{
 108        return !memcmp(&ent->dst, b, sizeof(ent->dst));
 109}
 110
 111static u_int32_t
 112hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
 113{
 114        u_int32_t hash = jhash2((const u32 *)dst,
 115                                sizeof(*dst)/sizeof(u32),
 116                                ht->rnd);
 117        /*
 118         * Instead of returning hash % ht->cfg.size (implying a divide)
 119         * we return the high 32 bits of the (hash * ht->cfg.size) that will
 120         * give results between [0 and cfg.size-1] and same hash distribution,
 121         * but using a multiply, less expensive than a divide
 122         */
 123        return ((u64)hash * ht->cfg.size) >> 32;
 124}
 125
 126static struct dsthash_ent *
 127dsthash_find(const struct xt_hashlimit_htable *ht,
 128             const struct dsthash_dst *dst)
 129{
 130        struct dsthash_ent *ent;
 131        struct hlist_node *pos;
 132        u_int32_t hash = hash_dst(ht, dst);
 133
 134        if (!hlist_empty(&ht->hash[hash])) {
 135                hlist_for_each_entry(ent, pos, &ht->hash[hash], node)
 136                        if (dst_cmp(ent, dst))
 137                                return ent;
 138        }
 139        return NULL;
 140}
 141
 142/* allocate dsthash_ent, initialize dst, put in htable and lock it */
 143static struct dsthash_ent *
 144dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 145                   const struct dsthash_dst *dst)
 146{
 147        struct dsthash_ent *ent;
 148
 149        /* initialize hash with random val at the time we allocate
 150         * the first hashtable entry */
 151        if (!ht->rnd_initialized) {
 152                get_random_bytes(&ht->rnd, sizeof(ht->rnd));
 153                ht->rnd_initialized = 1;
 154        }
 155
 156        if (ht->cfg.max && ht->count >= ht->cfg.max) {
 157                /* FIXME: do something. question is what.. */
 158                if (net_ratelimit())
 159                        printk(KERN_WARNING
 160                                "xt_hashlimit: max count of %u reached\n",
 161                                ht->cfg.max);
 162                return NULL;
 163        }
 164
 165        ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
 166        if (!ent) {
 167                if (net_ratelimit())
 168                        printk(KERN_ERR
 169                                "xt_hashlimit: can't allocate dsthash_ent\n");
 170                return NULL;
 171        }
 172        memcpy(&ent->dst, dst, sizeof(ent->dst));
 173
 174        hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
 175        ht->count++;
 176        return ent;
 177}
 178
 179static inline void
 180dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 181{
 182        hlist_del(&ent->node);
 183        kmem_cache_free(hashlimit_cachep, ent);
 184        ht->count--;
 185}
 186static void htable_gc(unsigned long htlong);
 187
 188static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 189{
 190        struct xt_hashlimit_htable *hinfo;
 191        unsigned int size;
 192        unsigned int i;
 193
 194        if (minfo->cfg.size)
 195                size = minfo->cfg.size;
 196        else {
 197                size = ((totalram_pages << PAGE_SHIFT) / 16384) /
 198                       sizeof(struct list_head);
 199                if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 200                        size = 8192;
 201                if (size < 16)
 202                        size = 16;
 203        }
 204        /* FIXME: don't use vmalloc() here or anywhere else -HW */
 205        hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 206                        sizeof(struct list_head) * size);
 207        if (!hinfo) {
 208                printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
 209                return -1;
 210        }
 211        minfo->hinfo = hinfo;
 212
 213        /* copy match config into hashtable config */
 214        hinfo->cfg.mode        = minfo->cfg.mode;
 215        hinfo->cfg.avg         = minfo->cfg.avg;
 216        hinfo->cfg.burst       = minfo->cfg.burst;
 217        hinfo->cfg.max         = minfo->cfg.max;
 218        hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
 219        hinfo->cfg.expire      = minfo->cfg.expire;
 220
 221        if (family == NFPROTO_IPV4)
 222                hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
 223        else
 224                hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
 225
 226        hinfo->cfg.size = size;
 227        if (!hinfo->cfg.max)
 228                hinfo->cfg.max = 8 * hinfo->cfg.size;
 229        else if (hinfo->cfg.max < hinfo->cfg.size)
 230                hinfo->cfg.max = hinfo->cfg.size;
 231
 232        for (i = 0; i < hinfo->cfg.size; i++)
 233                INIT_HLIST_HEAD(&hinfo->hash[i]);
 234
 235        atomic_set(&hinfo->use, 1);
 236        hinfo->count = 0;
 237        hinfo->family = family;
 238        hinfo->rnd_initialized = 0;
 239        spin_lock_init(&hinfo->lock);
 240        hinfo->pde = proc_create_data(minfo->name, 0,
 241                (family == NFPROTO_IPV4) ?
 242                hashlimit_procdir4 : hashlimit_procdir6,
 243                &dl_file_ops, hinfo);
 244        if (!hinfo->pde) {
 245                vfree(hinfo);
 246                return -1;
 247        }
 248
 249        setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
 250        hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 251        add_timer(&hinfo->timer);
 252
 253        spin_lock_bh(&hashlimit_lock);
 254        hlist_add_head(&hinfo->node, &hashlimit_htables);
 255        spin_unlock_bh(&hashlimit_lock);
 256
 257        return 0;
 258}
 259
 260static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 261{
 262        struct xt_hashlimit_htable *hinfo;
 263        unsigned int size;
 264        unsigned int i;
 265
 266        if (minfo->cfg.size) {
 267                size = minfo->cfg.size;
 268        } else {
 269                size = (totalram_pages << PAGE_SHIFT) / 16384 /
 270                       sizeof(struct list_head);
 271                if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 272                        size = 8192;
 273                if (size < 16)
 274                        size = 16;
 275        }
 276        /* FIXME: don't use vmalloc() here or anywhere else -HW */
 277        hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 278                        sizeof(struct list_head) * size);
 279        if (hinfo == NULL) {
 280                printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
 281                return -1;
 282        }
 283        minfo->hinfo = hinfo;
 284
 285        /* copy match config into hashtable config */
 286        memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
 287        hinfo->cfg.size = size;
 288        if (hinfo->cfg.max == 0)
 289                hinfo->cfg.max = 8 * hinfo->cfg.size;
 290        else if (hinfo->cfg.max < hinfo->cfg.size)
 291                hinfo->cfg.max = hinfo->cfg.size;
 292
 293        for (i = 0; i < hinfo->cfg.size; i++)
 294                INIT_HLIST_HEAD(&hinfo->hash[i]);
 295
 296        atomic_set(&hinfo->use, 1);
 297        hinfo->count = 0;
 298        hinfo->family = family;
 299        hinfo->rnd_initialized = 0;
 300        spin_lock_init(&hinfo->lock);
 301
 302        hinfo->pde = proc_create_data(minfo->name, 0,
 303                (family == NFPROTO_IPV4) ?
 304                hashlimit_procdir4 : hashlimit_procdir6,
 305                &dl_file_ops, hinfo);
 306        if (hinfo->pde == NULL) {
 307                vfree(hinfo);
 308                return -1;
 309        }
 310
 311        setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
 312        hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 313        add_timer(&hinfo->timer);
 314
 315        spin_lock_bh(&hashlimit_lock);
 316        hlist_add_head(&hinfo->node, &hashlimit_htables);
 317        spin_unlock_bh(&hashlimit_lock);
 318
 319        return 0;
 320}
 321
 322static bool select_all(const struct xt_hashlimit_htable *ht,
 323                       const struct dsthash_ent *he)
 324{
 325        return 1;
 326}
 327
 328static bool select_gc(const struct xt_hashlimit_htable *ht,
 329                      const struct dsthash_ent *he)
 330{
 331        return time_after_eq(jiffies, he->expires);
 332}
 333
 334static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
 335                        bool (*select)(const struct xt_hashlimit_htable *ht,
 336                                      const struct dsthash_ent *he))
 337{
 338        unsigned int i;
 339
 340        /* lock hash table and iterate over it */
 341        spin_lock_bh(&ht->lock);
 342        for (i = 0; i < ht->cfg.size; i++) {
 343                struct dsthash_ent *dh;
 344                struct hlist_node *pos, *n;
 345                hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
 346                        if ((*select)(ht, dh))
 347                                dsthash_free(ht, dh);
 348                }
 349        }
 350        spin_unlock_bh(&ht->lock);
 351}
 352
 353/* hash table garbage collector, run by timer */
 354static void htable_gc(unsigned long htlong)
 355{
 356        struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
 357
 358        htable_selective_cleanup(ht, select_gc);
 359
 360        /* re-add the timer accordingly */
 361        ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
 362        add_timer(&ht->timer);
 363}
 364
 365static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 366{
 367        del_timer_sync(&hinfo->timer);
 368
 369        /* remove proc entry */
 370        remove_proc_entry(hinfo->pde->name,
 371                          hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
 372                                                     hashlimit_procdir6);
 373        htable_selective_cleanup(hinfo, select_all);
 374        vfree(hinfo);
 375}
 376
 377static struct xt_hashlimit_htable *htable_find_get(const char *name,
 378                                                   u_int8_t family)
 379{
 380        struct xt_hashlimit_htable *hinfo;
 381        struct hlist_node *pos;
 382
 383        spin_lock_bh(&hashlimit_lock);
 384        hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
 385                if (!strcmp(name, hinfo->pde->name) &&
 386                    hinfo->family == family) {
 387                        atomic_inc(&hinfo->use);
 388                        spin_unlock_bh(&hashlimit_lock);
 389                        return hinfo;
 390                }
 391        }
 392        spin_unlock_bh(&hashlimit_lock);
 393        return NULL;
 394}
 395
 396static void htable_put(struct xt_hashlimit_htable *hinfo)
 397{
 398        if (atomic_dec_and_test(&hinfo->use)) {
 399                spin_lock_bh(&hashlimit_lock);
 400                hlist_del(&hinfo->node);
 401                spin_unlock_bh(&hashlimit_lock);
 402                htable_destroy(hinfo);
 403        }
 404}
 405
 406/* The algorithm used is the Simple Token Bucket Filter (TBF)
 407 * see net/sched/sch_tbf.c in the linux source tree
 408 */
 409
 410/* Rusty: This is my (non-mathematically-inclined) understanding of
 411   this algorithm.  The `average rate' in jiffies becomes your initial
 412   amount of credit `credit' and the most credit you can ever have
 413   `credit_cap'.  The `peak rate' becomes the cost of passing the
 414   test, `cost'.
 415
 416   `prev' tracks the last packet hit: you gain one credit per jiffy.
 417   If you get credit balance more than this, the extra credit is
 418   discarded.  Every time the match passes, you lose `cost' credits;
 419   if you don't have that many, the test fails.
 420
 421   See Alexey's formal explanation in net/sched/sch_tbf.c.
 422
 423   To get the maximum range, we multiply by this factor (ie. you get N
 424   credits per jiffy).  We want to allow a rate as low as 1 per day
 425   (slowest userspace tool allows), which means
 426   CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
 427*/
 428#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
 429
 430/* Repeated shift and or gives us all 1s, final shift and add 1 gives
 431 * us the power of 2 below the theoretical max, so GCC simply does a
 432 * shift. */
 433#define _POW2_BELOW2(x) ((x)|((x)>>1))
 434#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
 435#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
 436#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
 437#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
 438#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
 439
 440#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 441
 442/* Precision saver. */
 443static inline u_int32_t
 444user2credits(u_int32_t user)
 445{
 446        /* If multiplying would overflow... */
 447        if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
 448                /* Divide first. */
 449                return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
 450
 451        return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
 452}
 453
 454static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
 455{
 456        dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
 457        if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
 458                dh->rateinfo.credit = dh->rateinfo.credit_cap;
 459        dh->rateinfo.prev = now;
 460}
 461
 462static inline __be32 maskl(__be32 a, unsigned int l)
 463{
 464        return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0;
 465}
 466
 467#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 468static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
 469{
 470        switch (p) {
 471        case 0 ... 31:
 472                i[0] = maskl(i[0], p);
 473                i[1] = i[2] = i[3] = 0;
 474                break;
 475        case 32 ... 63:
 476                i[1] = maskl(i[1], p - 32);
 477                i[2] = i[3] = 0;
 478                break;
 479        case 64 ... 95:
 480                i[2] = maskl(i[2], p - 64);
 481                i[3] = 0;
 482        case 96 ... 127:
 483                i[3] = maskl(i[3], p - 96);
 484                break;
 485        case 128:
 486                break;
 487        }
 488}
 489#endif
 490
 491static int
 492hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 493                   struct dsthash_dst *dst,
 494                   const struct sk_buff *skb, unsigned int protoff)
 495{
 496        __be16 _ports[2], *ports;
 497        u8 nexthdr;
 498
 499        memset(dst, 0, sizeof(*dst));
 500
 501        switch (hinfo->family) {
 502        case NFPROTO_IPV4:
 503                if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
 504                        dst->ip.dst = maskl(ip_hdr(skb)->daddr,
 505                                      hinfo->cfg.dstmask);
 506                if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
 507                        dst->ip.src = maskl(ip_hdr(skb)->saddr,
 508                                      hinfo->cfg.srcmask);
 509
 510                if (!(hinfo->cfg.mode &
 511                      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
 512                        return 0;
 513                nexthdr = ip_hdr(skb)->protocol;
 514                break;
 515#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 516        case NFPROTO_IPV6:
 517                if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
 518                        memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
 519                               sizeof(dst->ip6.dst));
 520                        hashlimit_ipv6_mask(dst->ip6.dst, hinfo->cfg.dstmask);
 521                }
 522                if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) {
 523                        memcpy(&dst->ip6.src, &ipv6_hdr(skb)->saddr,
 524                               sizeof(dst->ip6.src));
 525                        hashlimit_ipv6_mask(dst->ip6.src, hinfo->cfg.srcmask);
 526                }
 527
 528                if (!(hinfo->cfg.mode &
 529                      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
 530                        return 0;
 531                nexthdr = ipv6_hdr(skb)->nexthdr;
 532                protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
 533                if ((int)protoff < 0)
 534                        return -1;
 535                break;
 536#endif
 537        default:
 538                BUG();
 539                return 0;
 540        }
 541
 542        switch (nexthdr) {
 543        case IPPROTO_TCP:
 544        case IPPROTO_UDP:
 545        case IPPROTO_UDPLITE:
 546        case IPPROTO_SCTP:
 547        case IPPROTO_DCCP:
 548                ports = skb_header_pointer(skb, protoff, sizeof(_ports),
 549                                           &_ports);
 550                break;
 551        default:
 552                _ports[0] = _ports[1] = 0;
 553                ports = _ports;
 554                break;
 555        }
 556        if (!ports)
 557                return -1;
 558        if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SPT)
 559                dst->src_port = ports[0];
 560        if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DPT)
 561                dst->dst_port = ports[1];
 562        return 0;
 563}
 564
 565static bool
 566hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 567{
 568        const struct xt_hashlimit_info *r = par->matchinfo;
 569        struct xt_hashlimit_htable *hinfo = r->hinfo;
 570        unsigned long now = jiffies;
 571        struct dsthash_ent *dh;
 572        struct dsthash_dst dst;
 573
 574        if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 575                goto hotdrop;
 576
 577        spin_lock_bh(&hinfo->lock);
 578        dh = dsthash_find(hinfo, &dst);
 579        if (!dh) {
 580                dh = dsthash_alloc_init(hinfo, &dst);
 581                if (!dh) {
 582                        spin_unlock_bh(&hinfo->lock);
 583                        goto hotdrop;
 584                }
 585
 586                dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
 587                dh->rateinfo.prev = jiffies;
 588                dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
 589                                                   hinfo->cfg.burst);
 590                dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
 591                                                       hinfo->cfg.burst);
 592                dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
 593        } else {
 594                /* update expiration timeout */
 595                dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
 596                rateinfo_recalc(dh, now);
 597        }
 598
 599        if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 600                /* We're underlimit. */
 601                dh->rateinfo.credit -= dh->rateinfo.cost;
 602                spin_unlock_bh(&hinfo->lock);
 603                return true;
 604        }
 605
 606        spin_unlock_bh(&hinfo->lock);
 607
 608        /* default case: we're overlimit, thus don't match */
 609        return false;
 610
 611hotdrop:
 612        *par->hotdrop = true;
 613        return false;
 614}
 615
 616static bool
 617hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 618{
 619        const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 620        struct xt_hashlimit_htable *hinfo = info->hinfo;
 621        unsigned long now = jiffies;
 622        struct dsthash_ent *dh;
 623        struct dsthash_dst dst;
 624
 625        if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 626                goto hotdrop;
 627
 628        spin_lock_bh(&hinfo->lock);
 629        dh = dsthash_find(hinfo, &dst);
 630        if (dh == NULL) {
 631                dh = dsthash_alloc_init(hinfo, &dst);
 632                if (dh == NULL) {
 633                        spin_unlock_bh(&hinfo->lock);
 634                        goto hotdrop;
 635                }
 636
 637                dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
 638                dh->rateinfo.prev = jiffies;
 639                dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
 640                                      hinfo->cfg.burst);
 641                dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
 642                                          hinfo->cfg.burst);
 643                dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
 644        } else {
 645                /* update expiration timeout */
 646                dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
 647                rateinfo_recalc(dh, now);
 648        }
 649
 650        if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 651                /* below the limit */
 652                dh->rateinfo.credit -= dh->rateinfo.cost;
 653                spin_unlock_bh(&hinfo->lock);
 654                return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
 655        }
 656
 657        spin_unlock_bh(&hinfo->lock);
 658        /* default match is underlimit - so over the limit, we need to invert */
 659        return info->cfg.mode & XT_HASHLIMIT_INVERT;
 660
 661 hotdrop:
 662        *par->hotdrop = true;
 663        return false;
 664}
 665
 666static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 667{
 668        struct xt_hashlimit_info *r = par->matchinfo;
 669
 670        /* Check for overflow. */
 671        if (r->cfg.burst == 0 ||
 672            user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
 673                printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
 674                       r->cfg.avg, r->cfg.burst);
 675                return false;
 676        }
 677        if (r->cfg.mode == 0 ||
 678            r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
 679                           XT_HASHLIMIT_HASH_DIP |
 680                           XT_HASHLIMIT_HASH_SIP |
 681                           XT_HASHLIMIT_HASH_SPT))
 682                return false;
 683        if (!r->cfg.gc_interval)
 684                return false;
 685        if (!r->cfg.expire)
 686                return false;
 687        if (r->name[sizeof(r->name) - 1] != '\0')
 688                return false;
 689
 690        /* This is the best we've got: We cannot release and re-grab lock,
 691         * since checkentry() is called before x_tables.c grabs xt_mutex.
 692         * We also cannot grab the hashtable spinlock, since htable_create will
 693         * call vmalloc, and that can sleep.  And we cannot just re-search
 694         * the list of htable's in htable_create(), since then we would
 695         * create duplicate proc files. -HW */
 696        mutex_lock(&hlimit_mutex);
 697        r->hinfo = htable_find_get(r->name, par->match->family);
 698        if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
 699                mutex_unlock(&hlimit_mutex);
 700                return false;
 701        }
 702        mutex_unlock(&hlimit_mutex);
 703
 704        return true;
 705}
 706
 707static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 708{
 709        struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 710
 711        /* Check for overflow. */
 712        if (info->cfg.burst == 0 ||
 713            user2credits(info->cfg.avg * info->cfg.burst) <
 714            user2credits(info->cfg.avg)) {
 715                printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
 716                       info->cfg.avg, info->cfg.burst);
 717                return false;
 718        }
 719        if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
 720                return false;
 721        if (info->name[sizeof(info->name)-1] != '\0')
 722                return false;
 723        if (par->match->family == NFPROTO_IPV4) {
 724                if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
 725                        return false;
 726        } else {
 727                if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
 728                        return false;
 729        }
 730
 731        /* This is the best we've got: We cannot release and re-grab lock,
 732         * since checkentry() is called before x_tables.c grabs xt_mutex.
 733         * We also cannot grab the hashtable spinlock, since htable_create will
 734         * call vmalloc, and that can sleep.  And we cannot just re-search
 735         * the list of htable's in htable_create(), since then we would
 736         * create duplicate proc files. -HW */
 737        mutex_lock(&hlimit_mutex);
 738        info->hinfo = htable_find_get(info->name, par->match->family);
 739        if (!info->hinfo && htable_create(info, par->match->family) != 0) {
 740                mutex_unlock(&hlimit_mutex);
 741                return false;
 742        }
 743        mutex_unlock(&hlimit_mutex);
 744        return true;
 745}
 746
 747static void
 748hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
 749{
 750        const struct xt_hashlimit_info *r = par->matchinfo;
 751
 752        htable_put(r->hinfo);
 753}
 754
 755static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 756{
 757        const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 758
 759        htable_put(info->hinfo);
 760}
 761
 762#ifdef CONFIG_COMPAT
 763struct compat_xt_hashlimit_info {
 764        char name[IFNAMSIZ];
 765        struct hashlimit_cfg cfg;
 766        compat_uptr_t hinfo;
 767        compat_uptr_t master;
 768};
 769
 770static void hashlimit_mt_compat_from_user(void *dst, void *src)
 771{
 772        int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 773
 774        memcpy(dst, src, off);
 775        memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
 776}
 777
 778static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
 779{
 780        int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 781
 782        return copy_to_user(dst, src, off) ? -EFAULT : 0;
 783}
 784#endif
 785
 786static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 787        {
 788                .name           = "hashlimit",
 789                .revision       = 0,
 790                .family         = NFPROTO_IPV4,
 791                .match          = hashlimit_mt_v0,
 792                .matchsize      = sizeof(struct xt_hashlimit_info),
 793#ifdef CONFIG_COMPAT
 794                .compatsize     = sizeof(struct compat_xt_hashlimit_info),
 795                .compat_from_user = hashlimit_mt_compat_from_user,
 796                .compat_to_user = hashlimit_mt_compat_to_user,
 797#endif
 798                .checkentry     = hashlimit_mt_check_v0,
 799                .destroy        = hashlimit_mt_destroy_v0,
 800                .me             = THIS_MODULE
 801        },
 802        {
 803                .name           = "hashlimit",
 804                .revision       = 1,
 805                .family         = NFPROTO_IPV4,
 806                .match          = hashlimit_mt,
 807                .matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
 808                .checkentry     = hashlimit_mt_check,
 809                .destroy        = hashlimit_mt_destroy,
 810                .me             = THIS_MODULE,
 811        },
 812#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 813        {
 814                .name           = "hashlimit",
 815                .family         = NFPROTO_IPV6,
 816                .match          = hashlimit_mt_v0,
 817                .matchsize      = sizeof(struct xt_hashlimit_info),
 818#ifdef CONFIG_COMPAT
 819                .compatsize     = sizeof(struct compat_xt_hashlimit_info),
 820                .compat_from_user = hashlimit_mt_compat_from_user,
 821                .compat_to_user = hashlimit_mt_compat_to_user,
 822#endif
 823                .checkentry     = hashlimit_mt_check_v0,
 824                .destroy        = hashlimit_mt_destroy_v0,
 825                .me             = THIS_MODULE
 826        },
 827        {
 828                .name           = "hashlimit",
 829                .revision       = 1,
 830                .family         = NFPROTO_IPV6,
 831                .match          = hashlimit_mt,
 832                .matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
 833                .checkentry     = hashlimit_mt_check,
 834                .destroy        = hashlimit_mt_destroy,
 835                .me             = THIS_MODULE,
 836        },
 837#endif
 838};
 839
 840/* PROC stuff */
 841static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 842        __acquires(htable->lock)
 843{
 844        struct proc_dir_entry *pde = s->private;
 845        struct xt_hashlimit_htable *htable = pde->data;
 846        unsigned int *bucket;
 847
 848        spin_lock_bh(&htable->lock);
 849        if (*pos >= htable->cfg.size)
 850                return NULL;
 851
 852        bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
 853        if (!bucket)
 854                return ERR_PTR(-ENOMEM);
 855
 856        *bucket = *pos;
 857        return bucket;
 858}
 859
 860static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 861{
 862        struct proc_dir_entry *pde = s->private;
 863        struct xt_hashlimit_htable *htable = pde->data;
 864        unsigned int *bucket = (unsigned int *)v;
 865
 866        *pos = ++(*bucket);
 867        if (*pos >= htable->cfg.size) {
 868                kfree(v);
 869                return NULL;
 870        }
 871        return bucket;
 872}
 873
 874static void dl_seq_stop(struct seq_file *s, void *v)
 875        __releases(htable->lock)
 876{
 877        struct proc_dir_entry *pde = s->private;
 878        struct xt_hashlimit_htable *htable = pde->data;
 879        unsigned int *bucket = (unsigned int *)v;
 880
 881        kfree(bucket);
 882        spin_unlock_bh(&htable->lock);
 883}
 884
 885static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 886                                   struct seq_file *s)
 887{
 888        /* recalculate to show accurate numbers */
 889        rateinfo_recalc(ent, jiffies);
 890
 891        switch (family) {
 892        case NFPROTO_IPV4:
 893                return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
 894                                 (long)(ent->expires - jiffies)/HZ,
 895                                 &ent->dst.ip.src,
 896                                 ntohs(ent->dst.src_port),
 897                                 &ent->dst.ip.dst,
 898                                 ntohs(ent->dst.dst_port),
 899                                 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 900                                 ent->rateinfo.cost);
 901#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 902        case NFPROTO_IPV6:
 903                return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
 904                                 (long)(ent->expires - jiffies)/HZ,
 905                                 &ent->dst.ip6.src,
 906                                 ntohs(ent->dst.src_port),
 907                                 &ent->dst.ip6.dst,
 908                                 ntohs(ent->dst.dst_port),
 909                                 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 910                                 ent->rateinfo.cost);
 911#endif
 912        default:
 913                BUG();
 914                return 0;
 915        }
 916}
 917
 918static int dl_seq_show(struct seq_file *s, void *v)
 919{
 920        struct proc_dir_entry *pde = s->private;
 921        struct xt_hashlimit_htable *htable = pde->data;
 922        unsigned int *bucket = (unsigned int *)v;
 923        struct dsthash_ent *ent;
 924        struct hlist_node *pos;
 925
 926        if (!hlist_empty(&htable->hash[*bucket])) {
 927                hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
 928                        if (dl_seq_real_show(ent, htable->family, s))
 929                                return -1;
 930        }
 931        return 0;
 932}
 933
 934static const struct seq_operations dl_seq_ops = {
 935        .start = dl_seq_start,
 936        .next  = dl_seq_next,
 937        .stop  = dl_seq_stop,
 938        .show  = dl_seq_show
 939};
 940
 941static int dl_proc_open(struct inode *inode, struct file *file)
 942{
 943        int ret = seq_open(file, &dl_seq_ops);
 944
 945        if (!ret) {
 946                struct seq_file *sf = file->private_data;
 947                sf->private = PDE(inode);
 948        }
 949        return ret;
 950}
 951
 952static const struct file_operations dl_file_ops = {
 953        .owner   = THIS_MODULE,
 954        .open    = dl_proc_open,
 955        .read    = seq_read,
 956        .llseek  = seq_lseek,
 957        .release = seq_release
 958};
 959
 960static int __init hashlimit_mt_init(void)
 961{
 962        int err;
 963
 964        err = xt_register_matches(hashlimit_mt_reg,
 965              ARRAY_SIZE(hashlimit_mt_reg));
 966        if (err < 0)
 967                goto err1;
 968
 969        err = -ENOMEM;
 970        hashlimit_cachep = kmem_cache_create("xt_hashlimit",
 971                                            sizeof(struct dsthash_ent), 0, 0,
 972                                            NULL);
 973        if (!hashlimit_cachep) {
 974                printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
 975                goto err2;
 976        }
 977        hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
 978        if (!hashlimit_procdir4) {
 979                printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
 980                                "entry\n");
 981                goto err3;
 982        }
 983        err = 0;
 984#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 985        hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
 986        if (!hashlimit_procdir6) {
 987                printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
 988                                "entry\n");
 989                err = -ENOMEM;
 990        }
 991#endif
 992        if (!err)
 993                return 0;
 994        remove_proc_entry("ipt_hashlimit", init_net.proc_net);
 995err3:
 996        kmem_cache_destroy(hashlimit_cachep);
 997err2:
 998        xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 999err1:
1000        return err;
1001
1002}
1003
1004static void __exit hashlimit_mt_exit(void)
1005{
1006        remove_proc_entry("ipt_hashlimit", init_net.proc_net);
1007#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
1008        remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
1009#endif
1010        kmem_cache_destroy(hashlimit_cachep);
1011        xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
1012}
1013
1014module_init(hashlimit_mt_init);
1015module_exit(hashlimit_mt_exit);
1016