linux/net/core/flow.c
<<
>>
Prefs
   1/* flow.c: Generic flow cache.
   2 *
   3 * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
   4 * Copyright (C) 2003 David S. Miller (davem@redhat.com)
   5 */
   6
   7#include <linux/kernel.h>
   8#include <linux/module.h>
   9#include <linux/list.h>
  10#include <linux/jhash.h>
  11#include <linux/interrupt.h>
  12#include <linux/mm.h>
  13#include <linux/random.h>
  14#include <linux/init.h>
  15#include <linux/slab.h>
  16#include <linux/smp.h>
  17#include <linux/completion.h>
  18#include <linux/percpu.h>
  19#include <linux/bitops.h>
  20#include <linux/notifier.h>
  21#include <linux/cpu.h>
  22#include <linux/cpumask.h>
  23#include <linux/mutex.h>
  24#include <net/flow.h>
  25#include <linux/atomic.h>
  26#include <linux/security.h>
  27
  28struct flow_cache_entry {
  29        union {
  30                struct hlist_node       hlist;
  31                struct list_head        gc_list;
  32        } u;
  33        struct net                      *net;
  34        u16                             family;
  35        u8                              dir;
  36        u32                             genid;
  37        struct flowi                    key;
  38        struct flow_cache_object        *object;
  39};
  40
  41struct flow_cache_percpu {
  42        struct hlist_head               *hash_table;
  43        int                             hash_count;
  44        u32                             hash_rnd;
  45        int                             hash_rnd_recalc;
  46        struct tasklet_struct           flush_tasklet;
  47};
  48
  49struct flow_flush_info {
  50        struct flow_cache               *cache;
  51        atomic_t                        cpuleft;
  52        struct completion               completion;
  53};
  54
  55struct flow_cache {
  56        u32                             hash_shift;
  57        struct flow_cache_percpu __percpu *percpu;
  58        struct notifier_block           hotcpu_notifier;
  59        int                             low_watermark;
  60        int                             high_watermark;
  61        struct timer_list               rnd_timer;
  62};
  63
  64atomic_t flow_cache_genid = ATOMIC_INIT(0);
  65EXPORT_SYMBOL(flow_cache_genid);
  66static struct flow_cache flow_cache_global;
  67static struct kmem_cache *flow_cachep __read_mostly;
  68
  69static DEFINE_SPINLOCK(flow_cache_gc_lock);
  70static LIST_HEAD(flow_cache_gc_list);
  71
  72#define flow_cache_hash_size(cache)     (1 << (cache)->hash_shift)
  73#define FLOW_HASH_RND_PERIOD            (10 * 60 * HZ)
  74
  75static void flow_cache_new_hashrnd(unsigned long arg)
  76{
  77        struct flow_cache *fc = (void *) arg;
  78        int i;
  79
  80        for_each_possible_cpu(i)
  81                per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
  82
  83        fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
  84        add_timer(&fc->rnd_timer);
  85}
  86
  87static int flow_entry_valid(struct flow_cache_entry *fle)
  88{
  89        if (atomic_read(&flow_cache_genid) != fle->genid)
  90                return 0;
  91        if (fle->object && !fle->object->ops->check(fle->object))
  92                return 0;
  93        return 1;
  94}
  95
  96static void flow_entry_kill(struct flow_cache_entry *fle)
  97{
  98        if (fle->object)
  99                fle->object->ops->delete(fle->object);
 100        kmem_cache_free(flow_cachep, fle);
 101}
 102
 103static void flow_cache_gc_task(struct work_struct *work)
 104{
 105        struct list_head gc_list;
 106        struct flow_cache_entry *fce, *n;
 107
 108        INIT_LIST_HEAD(&gc_list);
 109        spin_lock_bh(&flow_cache_gc_lock);
 110        list_splice_tail_init(&flow_cache_gc_list, &gc_list);
 111        spin_unlock_bh(&flow_cache_gc_lock);
 112
 113        list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
 114                flow_entry_kill(fce);
 115}
 116static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
 117
 118static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
 119                                     int deleted, struct list_head *gc_list)
 120{
 121        if (deleted) {
 122                fcp->hash_count -= deleted;
 123                spin_lock_bh(&flow_cache_gc_lock);
 124                list_splice_tail(gc_list, &flow_cache_gc_list);
 125                spin_unlock_bh(&flow_cache_gc_lock);
 126                schedule_work(&flow_cache_gc_work);
 127        }
 128}
 129
 130static void __flow_cache_shrink(struct flow_cache *fc,
 131                                struct flow_cache_percpu *fcp,
 132                                int shrink_to)
 133{
 134        struct flow_cache_entry *fle;
 135        struct hlist_node *entry, *tmp;
 136        LIST_HEAD(gc_list);
 137        int i, deleted = 0;
 138
 139        for (i = 0; i < flow_cache_hash_size(fc); i++) {
 140                int saved = 0;
 141
 142                hlist_for_each_entry_safe(fle, entry, tmp,
 143                                          &fcp->hash_table[i], u.hlist) {
 144                        if (saved < shrink_to &&
 145                            flow_entry_valid(fle)) {
 146                                saved++;
 147                        } else {
 148                                deleted++;
 149                                hlist_del(&fle->u.hlist);
 150                                list_add_tail(&fle->u.gc_list, &gc_list);
 151                        }
 152                }
 153        }
 154
 155        flow_cache_queue_garbage(fcp, deleted, &gc_list);
 156}
 157
 158static void flow_cache_shrink(struct flow_cache *fc,
 159                              struct flow_cache_percpu *fcp)
 160{
 161        int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
 162
 163        __flow_cache_shrink(fc, fcp, shrink_to);
 164}
 165
 166static void flow_new_hash_rnd(struct flow_cache *fc,
 167                              struct flow_cache_percpu *fcp)
 168{
 169        get_random_bytes(&fcp->hash_rnd, sizeof(u32));
 170        fcp->hash_rnd_recalc = 0;
 171        __flow_cache_shrink(fc, fcp, 0);
 172}
 173
 174static u32 flow_hash_code(struct flow_cache *fc,
 175                          struct flow_cache_percpu *fcp,
 176                          const struct flowi *key,
 177                          size_t keysize)
 178{
 179        const u32 *k = (const u32 *) key;
 180        const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
 181
 182        return jhash2(k, length, fcp->hash_rnd)
 183                & (flow_cache_hash_size(fc) - 1);
 184}
 185
 186/* I hear what you're saying, use memcmp.  But memcmp cannot make
 187 * important assumptions that we can here, such as alignment.
 188 */
 189static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
 190                            size_t keysize)
 191{
 192        const flow_compare_t *k1, *k1_lim, *k2;
 193
 194        k1 = (const flow_compare_t *) key1;
 195        k1_lim = k1 + keysize;
 196
 197        k2 = (const flow_compare_t *) key2;
 198
 199        do {
 200                if (*k1++ != *k2++)
 201                        return 1;
 202        } while (k1 < k1_lim);
 203
 204        return 0;
 205}
 206
 207struct flow_cache_object *
 208flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 209                  flow_resolve_t resolver, void *ctx)
 210{
 211        struct flow_cache *fc = &flow_cache_global;
 212        struct flow_cache_percpu *fcp;
 213        struct flow_cache_entry *fle, *tfle;
 214        struct hlist_node *entry;
 215        struct flow_cache_object *flo;
 216        size_t keysize;
 217        unsigned int hash;
 218
 219        local_bh_disable();
 220        fcp = this_cpu_ptr(fc->percpu);
 221
 222        fle = NULL;
 223        flo = NULL;
 224
 225        keysize = flow_key_size(family);
 226        if (!keysize)
 227                goto nocache;
 228
 229        /* Packet really early in init?  Making flow_cache_init a
 230         * pre-smp initcall would solve this.  --RR */
 231        if (!fcp->hash_table)
 232                goto nocache;
 233
 234        if (fcp->hash_rnd_recalc)
 235                flow_new_hash_rnd(fc, fcp);
 236
 237        hash = flow_hash_code(fc, fcp, key, keysize);
 238        hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
 239                if (tfle->net == net &&
 240                    tfle->family == family &&
 241                    tfle->dir == dir &&
 242                    flow_key_compare(key, &tfle->key, keysize) == 0) {
 243                        fle = tfle;
 244                        break;
 245                }
 246        }
 247
 248        if (unlikely(!fle)) {
 249                if (fcp->hash_count > fc->high_watermark)
 250                        flow_cache_shrink(fc, fcp);
 251
 252                fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 253                if (fle) {
 254                        fle->net = net;
 255                        fle->family = family;
 256                        fle->dir = dir;
 257                        memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
 258                        fle->object = NULL;
 259                        hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
 260                        fcp->hash_count++;
 261                }
 262        } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
 263                flo = fle->object;
 264                if (!flo)
 265                        goto ret_object;
 266                flo = flo->ops->get(flo);
 267                if (flo)
 268                        goto ret_object;
 269        } else if (fle->object) {
 270                flo = fle->object;
 271                flo->ops->delete(flo);
 272                fle->object = NULL;
 273        }
 274
 275nocache:
 276        flo = NULL;
 277        if (fle) {
 278                flo = fle->object;
 279                fle->object = NULL;
 280        }
 281        flo = resolver(net, key, family, dir, flo, ctx);
 282        if (fle) {
 283                fle->genid = atomic_read(&flow_cache_genid);
 284                if (!IS_ERR(flo))
 285                        fle->object = flo;
 286                else
 287                        fle->genid--;
 288        } else {
 289                if (flo && !IS_ERR(flo))
 290                        flo->ops->delete(flo);
 291        }
 292ret_object:
 293        local_bh_enable();
 294        return flo;
 295}
 296EXPORT_SYMBOL(flow_cache_lookup);
 297
 298static void flow_cache_flush_tasklet(unsigned long data)
 299{
 300        struct flow_flush_info *info = (void *)data;
 301        struct flow_cache *fc = info->cache;
 302        struct flow_cache_percpu *fcp;
 303        struct flow_cache_entry *fle;
 304        struct hlist_node *entry, *tmp;
 305        LIST_HEAD(gc_list);
 306        int i, deleted = 0;
 307
 308        fcp = this_cpu_ptr(fc->percpu);
 309        for (i = 0; i < flow_cache_hash_size(fc); i++) {
 310                hlist_for_each_entry_safe(fle, entry, tmp,
 311                                          &fcp->hash_table[i], u.hlist) {
 312                        if (flow_entry_valid(fle))
 313                                continue;
 314
 315                        deleted++;
 316                        hlist_del(&fle->u.hlist);
 317                        list_add_tail(&fle->u.gc_list, &gc_list);
 318                }
 319        }
 320
 321        flow_cache_queue_garbage(fcp, deleted, &gc_list);
 322
 323        if (atomic_dec_and_test(&info->cpuleft))
 324                complete(&info->completion);
 325}
 326
 327static void flow_cache_flush_per_cpu(void *data)
 328{
 329        struct flow_flush_info *info = data;
 330        struct tasklet_struct *tasklet;
 331
 332        tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
 333        tasklet->data = (unsigned long)info;
 334        tasklet_schedule(tasklet);
 335}
 336
 337void flow_cache_flush(void)
 338{
 339        struct flow_flush_info info;
 340        static DEFINE_MUTEX(flow_flush_sem);
 341
 342        /* Don't want cpus going down or up during this. */
 343        get_online_cpus();
 344        mutex_lock(&flow_flush_sem);
 345        info.cache = &flow_cache_global;
 346        atomic_set(&info.cpuleft, num_online_cpus());
 347        init_completion(&info.completion);
 348
 349        local_bh_disable();
 350        smp_call_function(flow_cache_flush_per_cpu, &info, 0);
 351        flow_cache_flush_tasklet((unsigned long)&info);
 352        local_bh_enable();
 353
 354        wait_for_completion(&info.completion);
 355        mutex_unlock(&flow_flush_sem);
 356        put_online_cpus();
 357}
 358
 359static void flow_cache_flush_task(struct work_struct *work)
 360{
 361        flow_cache_flush();
 362}
 363
 364static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
 365
 366void flow_cache_flush_deferred(void)
 367{
 368        schedule_work(&flow_cache_flush_work);
 369}
 370
 371static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
 372{
 373        struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
 374        size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
 375
 376        if (!fcp->hash_table) {
 377                fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
 378                if (!fcp->hash_table) {
 379                        pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
 380                        return -ENOMEM;
 381                }
 382                fcp->hash_rnd_recalc = 1;
 383                fcp->hash_count = 0;
 384                tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
 385        }
 386        return 0;
 387}
 388
 389static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
 390                          unsigned long action,
 391                          void *hcpu)
 392{
 393        struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
 394        int res, cpu = (unsigned long) hcpu;
 395        struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
 396
 397        switch (action) {
 398        case CPU_UP_PREPARE:
 399        case CPU_UP_PREPARE_FROZEN:
 400                res = flow_cache_cpu_prepare(fc, cpu);
 401                if (res)
 402                        return notifier_from_errno(res);
 403                break;
 404        case CPU_DEAD:
 405        case CPU_DEAD_FROZEN:
 406                __flow_cache_shrink(fc, fcp, 0);
 407                break;
 408        }
 409        return NOTIFY_OK;
 410}
 411
 412static int __init flow_cache_init(struct flow_cache *fc)
 413{
 414        int i;
 415
 416        fc->hash_shift = 10;
 417        fc->low_watermark = 2 * flow_cache_hash_size(fc);
 418        fc->high_watermark = 4 * flow_cache_hash_size(fc);
 419
 420        fc->percpu = alloc_percpu(struct flow_cache_percpu);
 421        if (!fc->percpu)
 422                return -ENOMEM;
 423
 424        for_each_online_cpu(i) {
 425                if (flow_cache_cpu_prepare(fc, i))
 426                        goto err;
 427        }
 428        fc->hotcpu_notifier = (struct notifier_block){
 429                .notifier_call = flow_cache_cpu,
 430        };
 431        register_hotcpu_notifier(&fc->hotcpu_notifier);
 432
 433        setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
 434                    (unsigned long) fc);
 435        fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
 436        add_timer(&fc->rnd_timer);
 437
 438        return 0;
 439
 440err:
 441        for_each_possible_cpu(i) {
 442                struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
 443                kfree(fcp->hash_table);
 444                fcp->hash_table = NULL;
 445        }
 446
 447        free_percpu(fc->percpu);
 448        fc->percpu = NULL;
 449
 450        return -ENOMEM;
 451}
 452
 453static int __init flow_cache_init_global(void)
 454{
 455        flow_cachep = kmem_cache_create("flow_cache",
 456                                        sizeof(struct flow_cache_entry),
 457                                        0, SLAB_PANIC, NULL);
 458
 459        return flow_cache_init(&flow_cache_global);
 460}
 461
 462module_init(flow_cache_init_global);
 463