linux/net/netfilter/nf_conntrack_core.c
<<
>>
Prefs
   1/* Connection state tracking for netfilter.  This is separated from,
   2   but required by, the NAT layer; it can also be used by an iptables
   3   extension. */
   4
   5/* (C) 1999-2001 Paul `Rusty' Russell
   6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
   7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
   8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License version 2 as
  12 * published by the Free Software Foundation.
  13 */
  14
  15#include <linux/types.h>
  16#include <linux/netfilter.h>
  17#include <linux/module.h>
  18#include <linux/sched.h>
  19#include <linux/skbuff.h>
  20#include <linux/proc_fs.h>
  21#include <linux/vmalloc.h>
  22#include <linux/stddef.h>
  23#include <linux/slab.h>
  24#include <linux/random.h>
  25#include <linux/jhash.h>
  26#include <linux/err.h>
  27#include <linux/percpu.h>
  28#include <linux/moduleparam.h>
  29#include <linux/notifier.h>
  30#include <linux/kernel.h>
  31#include <linux/netdevice.h>
  32#include <linux/socket.h>
  33#include <linux/mm.h>
  34#include <linux/nsproxy.h>
  35#include <linux/rculist_nulls.h>
  36
  37#include <net/netfilter/nf_conntrack.h>
  38#include <net/netfilter/nf_conntrack_l3proto.h>
  39#include <net/netfilter/nf_conntrack_l4proto.h>
  40#include <net/netfilter/nf_conntrack_expect.h>
  41#include <net/netfilter/nf_conntrack_helper.h>
  42#include <net/netfilter/nf_conntrack_seqadj.h>
  43#include <net/netfilter/nf_conntrack_core.h>
  44#include <net/netfilter/nf_conntrack_extend.h>
  45#include <net/netfilter/nf_conntrack_acct.h>
  46#include <net/netfilter/nf_conntrack_ecache.h>
  47#include <net/netfilter/nf_conntrack_zones.h>
  48#include <net/netfilter/nf_conntrack_timestamp.h>
  49#include <net/netfilter/nf_conntrack_timeout.h>
  50#include <net/netfilter/nf_conntrack_labels.h>
  51#include <net/netfilter/nf_conntrack_synproxy.h>
  52#include <net/netfilter/nf_nat.h>
  53#include <net/netfilter/nf_nat_core.h>
  54#include <net/netfilter/nf_nat_helper.h>
  55
  56#define NF_CONNTRACK_VERSION    "0.5.0"
  57
  58int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
  59                                      enum nf_nat_manip_type manip,
  60                                      const struct nlattr *attr) __read_mostly;
  61EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
  62
  63__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
  64EXPORT_SYMBOL_GPL(nf_conntrack_locks);
  65
  66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
  67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
  68
  69static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
  70{
  71        h1 %= CONNTRACK_LOCKS;
  72        h2 %= CONNTRACK_LOCKS;
  73        spin_unlock(&nf_conntrack_locks[h1]);
  74        if (h1 != h2)
  75                spin_unlock(&nf_conntrack_locks[h2]);
  76}
  77
  78/* return true if we need to recompute hashes (in case hash table was resized) */
  79static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
  80                                     unsigned int h2, unsigned int sequence)
  81{
  82        h1 %= CONNTRACK_LOCKS;
  83        h2 %= CONNTRACK_LOCKS;
  84        if (h1 <= h2) {
  85                spin_lock(&nf_conntrack_locks[h1]);
  86                if (h1 != h2)
  87                        spin_lock_nested(&nf_conntrack_locks[h2],
  88                                         SINGLE_DEPTH_NESTING);
  89        } else {
  90                spin_lock(&nf_conntrack_locks[h2]);
  91                spin_lock_nested(&nf_conntrack_locks[h1],
  92                                 SINGLE_DEPTH_NESTING);
  93        }
  94        if (read_seqcount_retry(&net->ct.generation, sequence)) {
  95                nf_conntrack_double_unlock(h1, h2);
  96                return true;
  97        }
  98        return false;
  99}
 100
 101static void nf_conntrack_all_lock(void)
 102{
 103        int i;
 104
 105        for (i = 0; i < CONNTRACK_LOCKS; i++)
 106                spin_lock_nested(&nf_conntrack_locks[i], i);
 107}
 108
 109static void nf_conntrack_all_unlock(void)
 110{
 111        int i;
 112
 113        for (i = 0; i < CONNTRACK_LOCKS; i++)
 114                spin_unlock(&nf_conntrack_locks[i]);
 115}
 116
 117unsigned int nf_conntrack_htable_size __read_mostly;
 118EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 119
 120unsigned int nf_conntrack_max __read_mostly;
 121EXPORT_SYMBOL_GPL(nf_conntrack_max);
 122
 123DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 124EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 125
 126unsigned int nf_conntrack_hash_rnd __read_mostly;
 127EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
 128
 129static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
 130{
 131        unsigned int n;
 132
 133        /* The direction must be ignored, so we hash everything up to the
 134         * destination ports (which is a multiple of 4) and treat the last
 135         * three bytes manually.
 136         */
 137        n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
 138        return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
 139                      (((__force __u16)tuple->dst.u.all << 16) |
 140                      tuple->dst.protonum));
 141}
 142
 143static u32 __hash_bucket(u32 hash, unsigned int size)
 144{
 145        return reciprocal_scale(hash, size);
 146}
 147
 148static u32 hash_bucket(u32 hash, const struct net *net)
 149{
 150        return __hash_bucket(hash, net->ct.htable_size);
 151}
 152
 153static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 154                                  unsigned int size)
 155{
 156        return __hash_bucket(hash_conntrack_raw(tuple), size);
 157}
 158
 159static inline u_int32_t hash_conntrack(const struct net *net,
 160                                       const struct nf_conntrack_tuple *tuple)
 161{
 162        return __hash_conntrack(tuple, net->ct.htable_size);
 163}
 164
 165bool
 166nf_ct_get_tuple(const struct sk_buff *skb,
 167                unsigned int nhoff,
 168                unsigned int dataoff,
 169                u_int16_t l3num,
 170                u_int8_t protonum,
 171                struct nf_conntrack_tuple *tuple,
 172                const struct nf_conntrack_l3proto *l3proto,
 173                const struct nf_conntrack_l4proto *l4proto)
 174{
 175        memset(tuple, 0, sizeof(*tuple));
 176
 177        tuple->src.l3num = l3num;
 178        if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
 179                return false;
 180
 181        tuple->dst.protonum = protonum;
 182        tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 183
 184        return l4proto->pkt_to_tuple(skb, dataoff, tuple);
 185}
 186EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
 187
 188bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 189                       u_int16_t l3num, struct nf_conntrack_tuple *tuple)
 190{
 191        struct nf_conntrack_l3proto *l3proto;
 192        struct nf_conntrack_l4proto *l4proto;
 193        unsigned int protoff;
 194        u_int8_t protonum;
 195        int ret;
 196
 197        rcu_read_lock();
 198
 199        l3proto = __nf_ct_l3proto_find(l3num);
 200        ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
 201        if (ret != NF_ACCEPT) {
 202                rcu_read_unlock();
 203                return false;
 204        }
 205
 206        l4proto = __nf_ct_l4proto_find(l3num, protonum);
 207
 208        ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
 209                              l3proto, l4proto);
 210
 211        rcu_read_unlock();
 212        return ret;
 213}
 214EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
 215
 216bool
 217nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 218                   const struct nf_conntrack_tuple *orig,
 219                   const struct nf_conntrack_l3proto *l3proto,
 220                   const struct nf_conntrack_l4proto *l4proto)
 221{
 222        memset(inverse, 0, sizeof(*inverse));
 223
 224        inverse->src.l3num = orig->src.l3num;
 225        if (l3proto->invert_tuple(inverse, orig) == 0)
 226                return false;
 227
 228        inverse->dst.dir = !orig->dst.dir;
 229
 230        inverse->dst.protonum = orig->dst.protonum;
 231        return l4proto->invert_tuple(inverse, orig);
 232}
 233EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 234
 235static void
 236clean_from_lists(struct nf_conn *ct)
 237{
 238        pr_debug("clean_from_lists(%p)\n", ct);
 239        hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
 240        hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
 241
 242        /* Destroy all pending expectations */
 243        nf_ct_remove_expectations(ct);
 244}
 245
 246/* must be called with local_bh_disable */
 247static void nf_ct_add_to_dying_list(struct nf_conn *ct)
 248{
 249        struct ct_pcpu *pcpu;
 250
 251        /* add this conntrack to the (per cpu) dying list */
 252        ct->cpu = smp_processor_id();
 253        pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
 254
 255        spin_lock(&pcpu->lock);
 256        hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
 257                             &pcpu->dying);
 258        spin_unlock(&pcpu->lock);
 259}
 260
 261/* must be called with local_bh_disable */
 262static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
 263{
 264        struct ct_pcpu *pcpu;
 265
 266        /* add this conntrack to the (per cpu) unconfirmed list */
 267        ct->cpu = smp_processor_id();
 268        pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
 269
 270        spin_lock(&pcpu->lock);
 271        hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
 272                             &pcpu->unconfirmed);
 273        spin_unlock(&pcpu->lock);
 274}
 275
 276/* must be called with local_bh_disable */
 277static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
 278{
 279        struct ct_pcpu *pcpu;
 280
 281        /* We overload first tuple to link into unconfirmed or dying list.*/
 282        pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
 283
 284        spin_lock(&pcpu->lock);
 285        BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
 286        hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
 287        spin_unlock(&pcpu->lock);
 288}
 289
 290/* Released via destroy_conntrack() */
 291struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 292                                 const struct nf_conntrack_zone *zone,
 293                                 gfp_t flags)
 294{
 295        struct nf_conn *tmpl;
 296
 297        tmpl = kzalloc(sizeof(*tmpl), flags);
 298        if (tmpl == NULL)
 299                return NULL;
 300
 301        tmpl->status = IPS_TEMPLATE;
 302        write_pnet(&tmpl->ct_net, net);
 303
 304        if (nf_ct_zone_add(tmpl, flags, zone) < 0)
 305                goto out_free;
 306
 307        atomic_set(&tmpl->ct_general.use, 0);
 308
 309        return tmpl;
 310out_free:
 311        kfree(tmpl);
 312        return NULL;
 313}
 314EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 315
 316void nf_ct_tmpl_free(struct nf_conn *tmpl)
 317{
 318        nf_ct_ext_destroy(tmpl);
 319        nf_ct_ext_free(tmpl);
 320        kfree(tmpl);
 321}
 322EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
 323
 324static void
 325destroy_conntrack(struct nf_conntrack *nfct)
 326{
 327        struct nf_conn *ct = (struct nf_conn *)nfct;
 328        struct net *net = nf_ct_net(ct);
 329        struct nf_conntrack_l4proto *l4proto;
 330
 331        pr_debug("destroy_conntrack(%p)\n", ct);
 332        NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
 333        NF_CT_ASSERT(!timer_pending(&ct->timeout));
 334
 335        if (unlikely(nf_ct_is_template(ct))) {
 336                nf_ct_tmpl_free(ct);
 337                return;
 338        }
 339        rcu_read_lock();
 340        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 341        if (l4proto && l4proto->destroy)
 342                l4proto->destroy(ct);
 343
 344        rcu_read_unlock();
 345
 346        local_bh_disable();
 347        /* Expectations will have been removed in clean_from_lists,
 348         * except TFTP can create an expectation on the first packet,
 349         * before connection is in the list, so we need to clean here,
 350         * too.
 351         */
 352        nf_ct_remove_expectations(ct);
 353
 354        nf_ct_del_from_dying_or_unconfirmed_list(ct);
 355
 356        NF_CT_STAT_INC(net, delete);
 357        local_bh_enable();
 358
 359        if (ct->master)
 360                nf_ct_put(ct->master);
 361
 362        pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
 363        nf_conntrack_free(ct);
 364}
 365
 366static void nf_ct_delete_from_lists(struct nf_conn *ct)
 367{
 368        struct net *net = nf_ct_net(ct);
 369        unsigned int hash, reply_hash;
 370        unsigned int sequence;
 371
 372        nf_ct_helper_destroy(ct);
 373
 374        local_bh_disable();
 375        do {
 376                sequence = read_seqcount_begin(&net->ct.generation);
 377                hash = hash_conntrack(net,
 378                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 379                reply_hash = hash_conntrack(net,
 380                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 381        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 382
 383        clean_from_lists(ct);
 384        nf_conntrack_double_unlock(hash, reply_hash);
 385
 386        nf_ct_add_to_dying_list(ct);
 387
 388        NF_CT_STAT_INC(net, delete_list);
 389        local_bh_enable();
 390}
 391
 392bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 393{
 394        struct nf_conn_tstamp *tstamp;
 395
 396        tstamp = nf_conn_tstamp_find(ct);
 397        if (tstamp && tstamp->stop == 0)
 398                tstamp->stop = ktime_get_real_ns();
 399
 400        if (nf_ct_is_dying(ct))
 401                goto delete;
 402
 403        if (nf_conntrack_event_report(IPCT_DESTROY, ct,
 404                                    portid, report) < 0) {
 405                /* destroy event was not delivered */
 406                nf_ct_delete_from_lists(ct);
 407                nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
 408                return false;
 409        }
 410
 411        nf_conntrack_ecache_work(nf_ct_net(ct));
 412        set_bit(IPS_DYING_BIT, &ct->status);
 413 delete:
 414        nf_ct_delete_from_lists(ct);
 415        nf_ct_put(ct);
 416        return true;
 417}
 418EXPORT_SYMBOL_GPL(nf_ct_delete);
 419
 420static void death_by_timeout(unsigned long ul_conntrack)
 421{
 422        nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
 423}
 424
 425static inline bool
 426nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 427                const struct nf_conntrack_tuple *tuple,
 428                const struct nf_conntrack_zone *zone)
 429{
 430        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 431
 432        /* A conntrack can be recreated with the equal tuple,
 433         * so we need to check that the conntrack is confirmed
 434         */
 435        return nf_ct_tuple_equal(tuple, &h->tuple) &&
 436               nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
 437               nf_ct_is_confirmed(ct);
 438}
 439
 440/*
 441 * Warning :
 442 * - Caller must take a reference on returned object
 443 *   and recheck nf_ct_tuple_equal(tuple, &h->tuple)
 444 */
 445static struct nf_conntrack_tuple_hash *
 446____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
 447                      const struct nf_conntrack_tuple *tuple, u32 hash)
 448{
 449        struct nf_conntrack_tuple_hash *h;
 450        struct hlist_nulls_node *n;
 451        unsigned int bucket = hash_bucket(hash, net);
 452
 453        /* Disable BHs the entire time since we normally need to disable them
 454         * at least once for the stats anyway.
 455         */
 456        local_bh_disable();
 457begin:
 458        hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
 459                if (nf_ct_key_equal(h, tuple, zone)) {
 460                        NF_CT_STAT_INC(net, found);
 461                        local_bh_enable();
 462                        return h;
 463                }
 464                NF_CT_STAT_INC(net, searched);
 465        }
 466        /*
 467         * if the nulls value we got at the end of this lookup is
 468         * not the expected one, we must restart lookup.
 469         * We probably met an item that was moved to another chain.
 470         */
 471        if (get_nulls_value(n) != bucket) {
 472                NF_CT_STAT_INC(net, search_restart);
 473                goto begin;
 474        }
 475        local_bh_enable();
 476
 477        return NULL;
 478}
 479
 480/* Find a connection corresponding to a tuple. */
 481static struct nf_conntrack_tuple_hash *
 482__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
 483                        const struct nf_conntrack_tuple *tuple, u32 hash)
 484{
 485        struct nf_conntrack_tuple_hash *h;
 486        struct nf_conn *ct;
 487
 488        rcu_read_lock();
 489begin:
 490        h = ____nf_conntrack_find(net, zone, tuple, hash);
 491        if (h) {
 492                ct = nf_ct_tuplehash_to_ctrack(h);
 493                if (unlikely(nf_ct_is_dying(ct) ||
 494                             !atomic_inc_not_zero(&ct->ct_general.use)))
 495                        h = NULL;
 496                else {
 497                        if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
 498                                nf_ct_put(ct);
 499                                goto begin;
 500                        }
 501                }
 502        }
 503        rcu_read_unlock();
 504
 505        return h;
 506}
 507
 508struct nf_conntrack_tuple_hash *
 509nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
 510                      const struct nf_conntrack_tuple *tuple)
 511{
 512        return __nf_conntrack_find_get(net, zone, tuple,
 513                                       hash_conntrack_raw(tuple));
 514}
 515EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
 516
 517static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 518                                       unsigned int hash,
 519                                       unsigned int reply_hash)
 520{
 521        struct net *net = nf_ct_net(ct);
 522
 523        hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
 524                           &net->ct.hash[hash]);
 525        hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
 526                           &net->ct.hash[reply_hash]);
 527}
 528
 529int
 530nf_conntrack_hash_check_insert(struct nf_conn *ct)
 531{
 532        const struct nf_conntrack_zone *zone;
 533        struct net *net = nf_ct_net(ct);
 534        unsigned int hash, reply_hash;
 535        struct nf_conntrack_tuple_hash *h;
 536        struct hlist_nulls_node *n;
 537        unsigned int sequence;
 538
 539        zone = nf_ct_zone(ct);
 540
 541        local_bh_disable();
 542        do {
 543                sequence = read_seqcount_begin(&net->ct.generation);
 544                hash = hash_conntrack(net,
 545                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 546                reply_hash = hash_conntrack(net,
 547                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 548        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 549
 550        /* See if there's one in the list already, including reverse */
 551        hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
 552                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 553                                      &h->tuple) &&
 554                    nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
 555                                     NF_CT_DIRECTION(h)))
 556                        goto out;
 557        hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
 558                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 559                                      &h->tuple) &&
 560                    nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
 561                                     NF_CT_DIRECTION(h)))
 562                        goto out;
 563
 564        add_timer(&ct->timeout);
 565        smp_wmb();
 566        /* The caller holds a reference to this object */
 567        atomic_set(&ct->ct_general.use, 2);
 568        __nf_conntrack_hash_insert(ct, hash, reply_hash);
 569        nf_conntrack_double_unlock(hash, reply_hash);
 570        NF_CT_STAT_INC(net, insert);
 571        local_bh_enable();
 572        return 0;
 573
 574out:
 575        nf_conntrack_double_unlock(hash, reply_hash);
 576        NF_CT_STAT_INC(net, insert_failed);
 577        local_bh_enable();
 578        return -EEXIST;
 579}
 580EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 581
 582/* Confirm a connection given skb; places it in hash table */
 583int
 584__nf_conntrack_confirm(struct sk_buff *skb)
 585{
 586        const struct nf_conntrack_zone *zone;
 587        unsigned int hash, reply_hash;
 588        struct nf_conntrack_tuple_hash *h;
 589        struct nf_conn *ct;
 590        struct nf_conn_help *help;
 591        struct nf_conn_tstamp *tstamp;
 592        struct hlist_nulls_node *n;
 593        enum ip_conntrack_info ctinfo;
 594        struct net *net;
 595        unsigned int sequence;
 596
 597        ct = nf_ct_get(skb, &ctinfo);
 598        net = nf_ct_net(ct);
 599
 600        /* ipt_REJECT uses nf_conntrack_attach to attach related
 601           ICMP/TCP RST packets in other direction.  Actual packet
 602           which created connection will be IP_CT_NEW or for an
 603           expected connection, IP_CT_RELATED. */
 604        if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 605                return NF_ACCEPT;
 606
 607        zone = nf_ct_zone(ct);
 608        local_bh_disable();
 609
 610        do {
 611                sequence = read_seqcount_begin(&net->ct.generation);
 612                /* reuse the hash saved before */
 613                hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
 614                hash = hash_bucket(hash, net);
 615                reply_hash = hash_conntrack(net,
 616                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 617
 618        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 619
 620        /* We're not in hash table, and we refuse to set up related
 621         * connections for unconfirmed conns.  But packet copies and
 622         * REJECT will give spurious warnings here.
 623         */
 624        /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 625
 626        /* No external references means no one else could have
 627         * confirmed us.
 628         */
 629        NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 630        pr_debug("Confirming conntrack %p\n", ct);
 631        /* We have to check the DYING flag after unlink to prevent
 632         * a race against nf_ct_get_next_corpse() possibly called from
 633         * user context, else we insert an already 'dead' hash, blocking
 634         * further use of that particular connection -JM.
 635         */
 636        nf_ct_del_from_dying_or_unconfirmed_list(ct);
 637
 638        if (unlikely(nf_ct_is_dying(ct)))
 639                goto out;
 640
 641        /* See if there's one in the list already, including reverse:
 642           NAT could have grabbed it without realizing, since we're
 643           not in the hash.  If there is, we lost race. */
 644        hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
 645                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 646                                      &h->tuple) &&
 647                    nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
 648                                     NF_CT_DIRECTION(h)))
 649                        goto out;
 650        hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
 651                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 652                                      &h->tuple) &&
 653                    nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
 654                                     NF_CT_DIRECTION(h)))
 655                        goto out;
 656
 657        /* Timer relative to confirmation time, not original
 658           setting time, otherwise we'd get timer wrap in
 659           weird delay cases. */
 660        ct->timeout.expires += jiffies;
 661        add_timer(&ct->timeout);
 662        atomic_inc(&ct->ct_general.use);
 663        ct->status |= IPS_CONFIRMED;
 664
 665        /* set conntrack timestamp, if enabled. */
 666        tstamp = nf_conn_tstamp_find(ct);
 667        if (tstamp) {
 668                if (skb->tstamp.tv64 == 0)
 669                        __net_timestamp(skb);
 670
 671                tstamp->start = ktime_to_ns(skb->tstamp);
 672        }
 673        /* Since the lookup is lockless, hash insertion must be done after
 674         * starting the timer and setting the CONFIRMED bit. The RCU barriers
 675         * guarantee that no other CPU can find the conntrack before the above
 676         * stores are visible.
 677         */
 678        __nf_conntrack_hash_insert(ct, hash, reply_hash);
 679        nf_conntrack_double_unlock(hash, reply_hash);
 680        NF_CT_STAT_INC(net, insert);
 681        local_bh_enable();
 682
 683        help = nfct_help(ct);
 684        if (help && help->helper)
 685                nf_conntrack_event_cache(IPCT_HELPER, ct);
 686
 687        nf_conntrack_event_cache(master_ct(ct) ?
 688                                 IPCT_RELATED : IPCT_NEW, ct);
 689        return NF_ACCEPT;
 690
 691out:
 692        nf_ct_add_to_dying_list(ct);
 693        nf_conntrack_double_unlock(hash, reply_hash);
 694        NF_CT_STAT_INC(net, insert_failed);
 695        local_bh_enable();
 696        return NF_DROP;
 697}
 698EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
 699
 700/* Returns true if a connection correspondings to the tuple (required
 701   for NAT). */
 702int
 703nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 704                         const struct nf_conn *ignored_conntrack)
 705{
 706        struct net *net = nf_ct_net(ignored_conntrack);
 707        const struct nf_conntrack_zone *zone;
 708        struct nf_conntrack_tuple_hash *h;
 709        struct hlist_nulls_node *n;
 710        struct nf_conn *ct;
 711        unsigned int hash;
 712
 713        zone = nf_ct_zone(ignored_conntrack);
 714        hash = hash_conntrack(net, tuple);
 715
 716        /* Disable BHs the entire time since we need to disable them at
 717         * least once for the stats anyway.
 718         */
 719        rcu_read_lock_bh();
 720        hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
 721                ct = nf_ct_tuplehash_to_ctrack(h);
 722                if (ct != ignored_conntrack &&
 723                    nf_ct_tuple_equal(tuple, &h->tuple) &&
 724                    nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
 725                        NF_CT_STAT_INC(net, found);
 726                        rcu_read_unlock_bh();
 727                        return 1;
 728                }
 729                NF_CT_STAT_INC(net, searched);
 730        }
 731        rcu_read_unlock_bh();
 732
 733        return 0;
 734}
 735EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 736
 737#define NF_CT_EVICTION_RANGE    8
 738
 739/* There's a small race here where we may free a just-assured
 740   connection.  Too bad: we're in trouble anyway. */
 741static noinline int early_drop(struct net *net, unsigned int _hash)
 742{
 743        /* Use oldest entry, which is roughly LRU */
 744        struct nf_conntrack_tuple_hash *h;
 745        struct nf_conn *ct = NULL, *tmp;
 746        struct hlist_nulls_node *n;
 747        unsigned int i = 0, cnt = 0;
 748        int dropped = 0;
 749        unsigned int hash, sequence;
 750        spinlock_t *lockp;
 751
 752        local_bh_disable();
 753restart:
 754        sequence = read_seqcount_begin(&net->ct.generation);
 755        hash = hash_bucket(_hash, net);
 756        for (; i < net->ct.htable_size; i++) {
 757                lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
 758                spin_lock(lockp);
 759                if (read_seqcount_retry(&net->ct.generation, sequence)) {
 760                        spin_unlock(lockp);
 761                        goto restart;
 762                }
 763                hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 764                                         hnnode) {
 765                        tmp = nf_ct_tuplehash_to_ctrack(h);
 766                        if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
 767                            !nf_ct_is_dying(tmp) &&
 768                            atomic_inc_not_zero(&tmp->ct_general.use)) {
 769                                ct = tmp;
 770                                break;
 771                        }
 772                        cnt++;
 773                }
 774
 775                hash = (hash + 1) % net->ct.htable_size;
 776                spin_unlock(lockp);
 777
 778                if (ct || cnt >= NF_CT_EVICTION_RANGE)
 779                        break;
 780
 781        }
 782        local_bh_enable();
 783
 784        if (!ct)
 785                return dropped;
 786
 787        if (del_timer(&ct->timeout)) {
 788                if (nf_ct_delete(ct, 0, 0)) {
 789                        dropped = 1;
 790                        NF_CT_STAT_INC_ATOMIC(net, early_drop);
 791                }
 792        }
 793        nf_ct_put(ct);
 794        return dropped;
 795}
 796
 797void init_nf_conntrack_hash_rnd(void)
 798{
 799        unsigned int rand;
 800
 801        /*
 802         * Why not initialize nf_conntrack_rnd in a "init()" function ?
 803         * Because there isn't enough entropy when system initializing,
 804         * and we initialize it as late as possible.
 805         */
 806        do {
 807                get_random_bytes(&rand, sizeof(rand));
 808        } while (!rand);
 809        cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
 810}
 811
 812static struct nf_conn *
 813__nf_conntrack_alloc(struct net *net,
 814                     const struct nf_conntrack_zone *zone,
 815                     const struct nf_conntrack_tuple *orig,
 816                     const struct nf_conntrack_tuple *repl,
 817                     gfp_t gfp, u32 hash)
 818{
 819        struct nf_conn *ct;
 820
 821        if (unlikely(!nf_conntrack_hash_rnd)) {
 822                init_nf_conntrack_hash_rnd();
 823                /* recompute the hash as nf_conntrack_hash_rnd is initialized */
 824                hash = hash_conntrack_raw(orig);
 825        }
 826
 827        /* We don't want any race condition at early drop stage */
 828        atomic_inc(&net->ct.count);
 829
 830        if (nf_conntrack_max &&
 831            unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
 832                if (!early_drop(net, hash)) {
 833                        atomic_dec(&net->ct.count);
 834                        net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
 835                        return ERR_PTR(-ENOMEM);
 836                }
 837        }
 838
 839        /*
 840         * Do not use kmem_cache_zalloc(), as this cache uses
 841         * SLAB_DESTROY_BY_RCU.
 842         */
 843        ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 844        if (ct == NULL)
 845                goto out;
 846
 847        spin_lock_init(&ct->lock);
 848        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 849        ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
 850        ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 851        /* save hash for reusing when confirming */
 852        *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
 853        ct->status = 0;
 854        /* Don't set timer yet: wait for confirmation */
 855        setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
 856        write_pnet(&ct->ct_net, net);
 857        memset(&ct->__nfct_init_offset[0], 0,
 858               offsetof(struct nf_conn, proto) -
 859               offsetof(struct nf_conn, __nfct_init_offset[0]));
 860
 861        if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
 862                goto out_free;
 863
 864        /* Because we use RCU lookups, we set ct_general.use to zero before
 865         * this is inserted in any list.
 866         */
 867        atomic_set(&ct->ct_general.use, 0);
 868        return ct;
 869out_free:
 870        kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 871out:
 872        atomic_dec(&net->ct.count);
 873        return ERR_PTR(-ENOMEM);
 874}
 875
 876struct nf_conn *nf_conntrack_alloc(struct net *net,
 877                                   const struct nf_conntrack_zone *zone,
 878                                   const struct nf_conntrack_tuple *orig,
 879                                   const struct nf_conntrack_tuple *repl,
 880                                   gfp_t gfp)
 881{
 882        return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
 883}
 884EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 885
 886void nf_conntrack_free(struct nf_conn *ct)
 887{
 888        struct net *net = nf_ct_net(ct);
 889
 890        /* A freed object has refcnt == 0, that's
 891         * the golden rule for SLAB_DESTROY_BY_RCU
 892         */
 893        NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
 894
 895        nf_ct_ext_destroy(ct);
 896        nf_ct_ext_free(ct);
 897        kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 898        smp_mb__before_atomic();
 899        atomic_dec(&net->ct.count);
 900}
 901EXPORT_SYMBOL_GPL(nf_conntrack_free);
 902
 903
 904/* Allocate a new conntrack: we return -ENOMEM if classification
 905   failed due to stress.  Otherwise it really is unclassifiable. */
 906static struct nf_conntrack_tuple_hash *
 907init_conntrack(struct net *net, struct nf_conn *tmpl,
 908               const struct nf_conntrack_tuple *tuple,
 909               struct nf_conntrack_l3proto *l3proto,
 910               struct nf_conntrack_l4proto *l4proto,
 911               struct sk_buff *skb,
 912               unsigned int dataoff, u32 hash)
 913{
 914        struct nf_conn *ct;
 915        struct nf_conn_help *help;
 916        struct nf_conntrack_tuple repl_tuple;
 917        struct nf_conntrack_ecache *ecache;
 918        struct nf_conntrack_expect *exp = NULL;
 919        const struct nf_conntrack_zone *zone;
 920        struct nf_conn_timeout *timeout_ext;
 921        struct nf_conntrack_zone tmp;
 922        unsigned int *timeouts;
 923
 924        if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
 925                pr_debug("Can't invert tuple.\n");
 926                return NULL;
 927        }
 928
 929        zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 930        ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
 931                                  hash);
 932        if (IS_ERR(ct))
 933                return (struct nf_conntrack_tuple_hash *)ct;
 934
 935        if (tmpl && nfct_synproxy(tmpl)) {
 936                nfct_seqadj_ext_add(ct);
 937                nfct_synproxy_ext_add(ct);
 938        }
 939
 940        timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
 941        if (timeout_ext)
 942                timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
 943        else
 944                timeouts = l4proto->get_timeouts(net);
 945
 946        if (!l4proto->new(ct, skb, dataoff, timeouts)) {
 947                nf_conntrack_free(ct);
 948                pr_debug("init conntrack: can't track with proto module\n");
 949                return NULL;
 950        }
 951
 952        if (timeout_ext)
 953                nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC);
 954
 955        nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 956        nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 957        nf_ct_labels_ext_add(ct);
 958
 959        ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
 960        nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
 961                                 ecache ? ecache->expmask : 0,
 962                             GFP_ATOMIC);
 963
 964        local_bh_disable();
 965        if (net->ct.expect_count) {
 966                spin_lock(&nf_conntrack_expect_lock);
 967                exp = nf_ct_find_expectation(net, zone, tuple);
 968                if (exp) {
 969                        pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
 970                                 ct, exp);
 971                        /* Welcome, Mr. Bond.  We've been expecting you... */
 972                        __set_bit(IPS_EXPECTED_BIT, &ct->status);
 973                        /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
 974                        ct->master = exp->master;
 975                        if (exp->helper) {
 976                                help = nf_ct_helper_ext_add(ct, exp->helper,
 977                                                            GFP_ATOMIC);
 978                                if (help)
 979                                        rcu_assign_pointer(help->helper, exp->helper);
 980                        }
 981
 982#ifdef CONFIG_NF_CONNTRACK_MARK
 983                        ct->mark = exp->master->mark;
 984#endif
 985#ifdef CONFIG_NF_CONNTRACK_SECMARK
 986                        ct->secmark = exp->master->secmark;
 987#endif
 988                        NF_CT_STAT_INC(net, expect_new);
 989                }
 990                spin_unlock(&nf_conntrack_expect_lock);
 991        }
 992        if (!exp) {
 993                __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
 994                NF_CT_STAT_INC(net, new);
 995        }
 996
 997        /* Now it is inserted into the unconfirmed list, bump refcount */
 998        nf_conntrack_get(&ct->ct_general);
 999        nf_ct_add_to_unconfirmed_list(ct);
1000
1001        local_bh_enable();
1002
1003        if (exp) {
1004                if (exp->expectfn)
1005                        exp->expectfn(ct, exp);
1006                nf_ct_expect_put(exp);
1007        }
1008
1009        return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
1010}
1011
1012/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1013static inline struct nf_conn *
1014resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1015                  struct sk_buff *skb,
1016                  unsigned int dataoff,
1017                  u_int16_t l3num,
1018                  u_int8_t protonum,
1019                  struct nf_conntrack_l3proto *l3proto,
1020                  struct nf_conntrack_l4proto *l4proto,
1021                  int *set_reply,
1022                  enum ip_conntrack_info *ctinfo)
1023{
1024        const struct nf_conntrack_zone *zone;
1025        struct nf_conntrack_tuple tuple;
1026        struct nf_conntrack_tuple_hash *h;
1027        struct nf_conntrack_zone tmp;
1028        struct nf_conn *ct;
1029        u32 hash;
1030
1031        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
1032                             dataoff, l3num, protonum, &tuple, l3proto,
1033                             l4proto)) {
1034                pr_debug("resolve_normal_ct: Can't get tuple\n");
1035                return NULL;
1036        }
1037
1038        /* look for tuple match */
1039        zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1040        hash = hash_conntrack_raw(&tuple);
1041        h = __nf_conntrack_find_get(net, zone, &tuple, hash);
1042        if (!h) {
1043                h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
1044                                   skb, dataoff, hash);
1045                if (!h)
1046                        return NULL;
1047                if (IS_ERR(h))
1048                        return (void *)h;
1049        }
1050        ct = nf_ct_tuplehash_to_ctrack(h);
1051
1052        /* It exists; we have (non-exclusive) reference. */
1053        if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1054                *ctinfo = IP_CT_ESTABLISHED_REPLY;
1055                /* Please set reply bit if this packet OK */
1056                *set_reply = 1;
1057        } else {
1058                /* Once we've had two way comms, always ESTABLISHED. */
1059                if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1060                        pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
1061                        *ctinfo = IP_CT_ESTABLISHED;
1062                } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1063                        pr_debug("nf_conntrack_in: related packet for %p\n",
1064                                 ct);
1065                        *ctinfo = IP_CT_RELATED;
1066                } else {
1067                        pr_debug("nf_conntrack_in: new packet for %p\n", ct);
1068                        *ctinfo = IP_CT_NEW;
1069                }
1070                *set_reply = 0;
1071        }
1072        skb->nfct = &ct->ct_general;
1073        skb->nfctinfo = *ctinfo;
1074        return ct;
1075}
1076
1077unsigned int
1078nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1079                struct sk_buff *skb)
1080{
1081        struct nf_conn *ct, *tmpl = NULL;
1082        enum ip_conntrack_info ctinfo;
1083        struct nf_conntrack_l3proto *l3proto;
1084        struct nf_conntrack_l4proto *l4proto;
1085        unsigned int *timeouts;
1086        unsigned int dataoff;
1087        u_int8_t protonum;
1088        int set_reply = 0;
1089        int ret;
1090
1091        if (skb->nfct) {
1092                /* Previously seen (loopback or untracked)?  Ignore. */
1093                tmpl = (struct nf_conn *)skb->nfct;
1094                if (!nf_ct_is_template(tmpl)) {
1095                        NF_CT_STAT_INC_ATOMIC(net, ignore);
1096                        return NF_ACCEPT;
1097                }
1098                skb->nfct = NULL;
1099        }
1100
1101        /* rcu_read_lock()ed by nf_hook_slow */
1102        l3proto = __nf_ct_l3proto_find(pf);
1103        ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
1104                                   &dataoff, &protonum);
1105        if (ret <= 0) {
1106                pr_debug("not prepared to track yet or error occurred\n");
1107                NF_CT_STAT_INC_ATOMIC(net, error);
1108                NF_CT_STAT_INC_ATOMIC(net, invalid);
1109                ret = -ret;
1110                goto out;
1111        }
1112
1113        l4proto = __nf_ct_l4proto_find(pf, protonum);
1114
1115        /* It may be an special packet, error, unclean...
1116         * inverse of the return code tells to the netfilter
1117         * core what to do with the packet. */
1118        if (l4proto->error != NULL) {
1119                ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
1120                                     pf, hooknum);
1121                if (ret <= 0) {
1122                        NF_CT_STAT_INC_ATOMIC(net, error);
1123                        NF_CT_STAT_INC_ATOMIC(net, invalid);
1124                        ret = -ret;
1125                        goto out;
1126                }
1127                /* ICMP[v6] protocol trackers may assign one conntrack. */
1128                if (skb->nfct)
1129                        goto out;
1130        }
1131
1132        ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
1133                               l3proto, l4proto, &set_reply, &ctinfo);
1134        if (!ct) {
1135                /* Not valid part of a connection */
1136                NF_CT_STAT_INC_ATOMIC(net, invalid);
1137                ret = NF_ACCEPT;
1138                goto out;
1139        }
1140
1141        if (IS_ERR(ct)) {
1142                /* Too stressed to deal. */
1143                NF_CT_STAT_INC_ATOMIC(net, drop);
1144                ret = NF_DROP;
1145                goto out;
1146        }
1147
1148        NF_CT_ASSERT(skb->nfct);
1149
1150        /* Decide what timeout policy we want to apply to this flow. */
1151        timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1152
1153        ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
1154        if (ret <= 0) {
1155                /* Invalid: inverse of the return code tells
1156                 * the netfilter core what to do */
1157                pr_debug("nf_conntrack_in: Can't track with proto module\n");
1158                nf_conntrack_put(skb->nfct);
1159                skb->nfct = NULL;
1160                NF_CT_STAT_INC_ATOMIC(net, invalid);
1161                if (ret == -NF_DROP)
1162                        NF_CT_STAT_INC_ATOMIC(net, drop);
1163                ret = -ret;
1164                goto out;
1165        }
1166
1167        if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1168                nf_conntrack_event_cache(IPCT_REPLY, ct);
1169out:
1170        if (tmpl) {
1171                /* Special case: we have to repeat this hook, assign the
1172                 * template again to this packet. We assume that this packet
1173                 * has no conntrack assigned. This is used by nf_ct_tcp. */
1174                if (ret == NF_REPEAT)
1175                        skb->nfct = (struct nf_conntrack *)tmpl;
1176                else
1177                        nf_ct_put(tmpl);
1178        }
1179
1180        return ret;
1181}
1182EXPORT_SYMBOL_GPL(nf_conntrack_in);
1183
1184bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1185                          const struct nf_conntrack_tuple *orig)
1186{
1187        bool ret;
1188
1189        rcu_read_lock();
1190        ret = nf_ct_invert_tuple(inverse, orig,
1191                                 __nf_ct_l3proto_find(orig->src.l3num),
1192                                 __nf_ct_l4proto_find(orig->src.l3num,
1193                                                      orig->dst.protonum));
1194        rcu_read_unlock();
1195        return ret;
1196}
1197EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
1198
1199/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
1200   implicitly racy: see __nf_conntrack_confirm */
1201void nf_conntrack_alter_reply(struct nf_conn *ct,
1202                              const struct nf_conntrack_tuple *newreply)
1203{
1204        struct nf_conn_help *help = nfct_help(ct);
1205
1206        /* Should be unconfirmed, so not in hash table yet */
1207        NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
1208
1209        pr_debug("Altering reply tuple of %p to ", ct);
1210        nf_ct_dump_tuple(newreply);
1211
1212        ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1213        if (ct->master || (help && !hlist_empty(&help->expectations)))
1214                return;
1215
1216        rcu_read_lock();
1217        __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
1218        rcu_read_unlock();
1219}
1220EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
1221
1222/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1223void __nf_ct_refresh_acct(struct nf_conn *ct,
1224                          enum ip_conntrack_info ctinfo,
1225                          const struct sk_buff *skb,
1226                          unsigned long extra_jiffies,
1227                          int do_acct)
1228{
1229        NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1230        NF_CT_ASSERT(skb);
1231
1232        /* Only update if this is not a fixed timeout */
1233        if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
1234                goto acct;
1235
1236        /* If not in hash table, timer will not be active yet */
1237        if (!nf_ct_is_confirmed(ct)) {
1238                ct->timeout.expires = extra_jiffies;
1239        } else {
1240                unsigned long newtime = jiffies + extra_jiffies;
1241
1242                /* Only update the timeout if the new timeout is at least
1243                   HZ jiffies from the old timeout. Need del_timer for race
1244                   avoidance (may already be dying). */
1245                if (newtime - ct->timeout.expires >= HZ)
1246                        mod_timer_pending(&ct->timeout, newtime);
1247        }
1248
1249acct:
1250        if (do_acct) {
1251                struct nf_conn_acct *acct;
1252
1253                acct = nf_conn_acct_find(ct);
1254                if (acct) {
1255                        struct nf_conn_counter *counter = acct->counter;
1256
1257                        atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
1258                        atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
1259                }
1260        }
1261}
1262EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1263
1264bool __nf_ct_kill_acct(struct nf_conn *ct,
1265                       enum ip_conntrack_info ctinfo,
1266                       const struct sk_buff *skb,
1267                       int do_acct)
1268{
1269        if (do_acct) {
1270                struct nf_conn_acct *acct;
1271
1272                acct = nf_conn_acct_find(ct);
1273                if (acct) {
1274                        struct nf_conn_counter *counter = acct->counter;
1275
1276                        atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
1277                        atomic64_add(skb->len - skb_network_offset(skb),
1278                                     &counter[CTINFO2DIR(ctinfo)].bytes);
1279                }
1280        }
1281
1282        if (del_timer(&ct->timeout)) {
1283                ct->timeout.function((unsigned long)ct);
1284                return true;
1285        }
1286        return false;
1287}
1288EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
1289
1290#ifdef CONFIG_NF_CONNTRACK_ZONES
1291static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
1292        .len    = sizeof(struct nf_conntrack_zone),
1293        .align  = __alignof__(struct nf_conntrack_zone),
1294        .id     = NF_CT_EXT_ZONE,
1295};
1296#endif
1297
1298#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1299
1300#include <linux/netfilter/nfnetlink.h>
1301#include <linux/netfilter/nfnetlink_conntrack.h>
1302#include <linux/mutex.h>
1303
1304/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1305 * in ip_conntrack_core, since we don't want the protocols to autoload
1306 * or depend on ctnetlink */
1307int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
1308                               const struct nf_conntrack_tuple *tuple)
1309{
1310        if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
1311            nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
1312                goto nla_put_failure;
1313        return 0;
1314
1315nla_put_failure:
1316        return -1;
1317}
1318EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
1319
1320const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
1321        [CTA_PROTO_SRC_PORT]  = { .type = NLA_U16 },
1322        [CTA_PROTO_DST_PORT]  = { .type = NLA_U16 },
1323};
1324EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
1325
1326int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1327                               struct nf_conntrack_tuple *t)
1328{
1329        if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
1330                return -EINVAL;
1331
1332        t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
1333        t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
1334
1335        return 0;
1336}
1337EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1338
1339int nf_ct_port_nlattr_tuple_size(void)
1340{
1341        return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1342}
1343EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1344#endif
1345
1346/* Used by ipt_REJECT and ip6t_REJECT. */
1347static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
1348{
1349        struct nf_conn *ct;
1350        enum ip_conntrack_info ctinfo;
1351
1352        /* This ICMP is in reverse direction to the packet which caused it */
1353        ct = nf_ct_get(skb, &ctinfo);
1354        if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1355                ctinfo = IP_CT_RELATED_REPLY;
1356        else
1357                ctinfo = IP_CT_RELATED;
1358
1359        /* Attach to new skbuff, and increment count */
1360        nskb->nfct = &ct->ct_general;
1361        nskb->nfctinfo = ctinfo;
1362        nf_conntrack_get(nskb->nfct);
1363}
1364
1365/* Bring out ya dead! */
1366static struct nf_conn *
1367get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1368                void *data, unsigned int *bucket)
1369{
1370        struct nf_conntrack_tuple_hash *h;
1371        struct nf_conn *ct;
1372        struct hlist_nulls_node *n;
1373        int cpu;
1374        spinlock_t *lockp;
1375
1376        for (; *bucket < net->ct.htable_size; (*bucket)++) {
1377                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1378                local_bh_disable();
1379                spin_lock(lockp);
1380                if (*bucket < net->ct.htable_size) {
1381                        hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
1382                                if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1383                                        continue;
1384                                ct = nf_ct_tuplehash_to_ctrack(h);
1385                                if (iter(ct, data))
1386                                        goto found;
1387                        }
1388                }
1389                spin_unlock(lockp);
1390                local_bh_enable();
1391        }
1392
1393        for_each_possible_cpu(cpu) {
1394                struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1395
1396                spin_lock_bh(&pcpu->lock);
1397                hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
1398                        ct = nf_ct_tuplehash_to_ctrack(h);
1399                        if (iter(ct, data))
1400                                set_bit(IPS_DYING_BIT, &ct->status);
1401                }
1402                spin_unlock_bh(&pcpu->lock);
1403        }
1404        return NULL;
1405found:
1406        atomic_inc(&ct->ct_general.use);
1407        spin_unlock(lockp);
1408        local_bh_enable();
1409        return ct;
1410}
1411
1412void nf_ct_iterate_cleanup(struct net *net,
1413                           int (*iter)(struct nf_conn *i, void *data),
1414                           void *data, u32 portid, int report)
1415{
1416        struct nf_conn *ct;
1417        unsigned int bucket = 0;
1418
1419        while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1420                /* Time to push up daises... */
1421                if (del_timer(&ct->timeout))
1422                        nf_ct_delete(ct, portid, report);
1423
1424                /* ... else the timer will get him soon. */
1425
1426                nf_ct_put(ct);
1427        }
1428}
1429EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1430
1431static int kill_all(struct nf_conn *i, void *data)
1432{
1433        return 1;
1434}
1435
1436void nf_ct_free_hashtable(void *hash, unsigned int size)
1437{
1438        if (is_vmalloc_addr(hash))
1439                vfree(hash);
1440        else
1441                free_pages((unsigned long)hash,
1442                           get_order(sizeof(struct hlist_head) * size));
1443}
1444EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1445
1446static int untrack_refs(void)
1447{
1448        int cnt = 0, cpu;
1449
1450        for_each_possible_cpu(cpu) {
1451                struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1452
1453                cnt += atomic_read(&ct->ct_general.use) - 1;
1454        }
1455        return cnt;
1456}
1457
1458void nf_conntrack_cleanup_start(void)
1459{
1460        RCU_INIT_POINTER(ip_ct_attach, NULL);
1461}
1462
1463void nf_conntrack_cleanup_end(void)
1464{
1465        RCU_INIT_POINTER(nf_ct_destroy, NULL);
1466        while (untrack_refs() > 0)
1467                schedule();
1468
1469#ifdef CONFIG_NF_CONNTRACK_ZONES
1470        nf_ct_extend_unregister(&nf_ct_zone_extend);
1471#endif
1472        nf_conntrack_proto_fini();
1473        nf_conntrack_seqadj_fini();
1474        nf_conntrack_labels_fini();
1475        nf_conntrack_helper_fini();
1476        nf_conntrack_timeout_fini();
1477        nf_conntrack_ecache_fini();
1478        nf_conntrack_tstamp_fini();
1479        nf_conntrack_acct_fini();
1480        nf_conntrack_expect_fini();
1481}
1482
1483/*
1484 * Mishearing the voices in his head, our hero wonders how he's
1485 * supposed to kill the mall.
1486 */
1487void nf_conntrack_cleanup_net(struct net *net)
1488{
1489        LIST_HEAD(single);
1490
1491        list_add(&net->exit_list, &single);
1492        nf_conntrack_cleanup_net_list(&single);
1493}
1494
1495void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1496{
1497        int busy;
1498        struct net *net;
1499
1500        /*
1501         * This makes sure all current packets have passed through
1502         *  netfilter framework.  Roll on, two-stage module
1503         *  delete...
1504         */
1505        synchronize_net();
1506i_see_dead_people:
1507        busy = 0;
1508        list_for_each_entry(net, net_exit_list, exit_list) {
1509                nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
1510                if (atomic_read(&net->ct.count) != 0)
1511                        busy = 1;
1512        }
1513        if (busy) {
1514                schedule();
1515                goto i_see_dead_people;
1516        }
1517
1518        list_for_each_entry(net, net_exit_list, exit_list) {
1519                nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1520                nf_conntrack_proto_pernet_fini(net);
1521                nf_conntrack_helper_pernet_fini(net);
1522                nf_conntrack_ecache_pernet_fini(net);
1523                nf_conntrack_tstamp_pernet_fini(net);
1524                nf_conntrack_acct_pernet_fini(net);
1525                nf_conntrack_expect_pernet_fini(net);
1526                kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1527                kfree(net->ct.slabname);
1528                free_percpu(net->ct.stat);
1529                free_percpu(net->ct.pcpu_lists);
1530        }
1531}
1532
1533void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1534{
1535        struct hlist_nulls_head *hash;
1536        unsigned int nr_slots, i;
1537        size_t sz;
1538
1539        BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1540        nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1541        sz = nr_slots * sizeof(struct hlist_nulls_head);
1542        hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1543                                        get_order(sz));
1544        if (!hash)
1545                hash = vzalloc(sz);
1546
1547        if (hash && nulls)
1548                for (i = 0; i < nr_slots; i++)
1549                        INIT_HLIST_NULLS_HEAD(&hash[i], i);
1550
1551        return hash;
1552}
1553EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1554
1555int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1556{
1557        int i, bucket, rc;
1558        unsigned int hashsize, old_size;
1559        struct hlist_nulls_head *hash, *old_hash;
1560        struct nf_conntrack_tuple_hash *h;
1561        struct nf_conn *ct;
1562
1563        if (current->nsproxy->net_ns != &init_net)
1564                return -EOPNOTSUPP;
1565
1566        /* On boot, we can set this without any fancy locking. */
1567        if (!nf_conntrack_htable_size)
1568                return param_set_uint(val, kp);
1569
1570        rc = kstrtouint(val, 0, &hashsize);
1571        if (rc)
1572                return rc;
1573        if (!hashsize)
1574                return -EINVAL;
1575
1576        hash = nf_ct_alloc_hashtable(&hashsize, 1);
1577        if (!hash)
1578                return -ENOMEM;
1579
1580        local_bh_disable();
1581        nf_conntrack_all_lock();
1582        write_seqcount_begin(&init_net.ct.generation);
1583
1584        /* Lookups in the old hash might happen in parallel, which means we
1585         * might get false negatives during connection lookup. New connections
1586         * created because of a false negative won't make it into the hash
1587         * though since that required taking the locks.
1588         */
1589
1590        for (i = 0; i < init_net.ct.htable_size; i++) {
1591                while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
1592                        h = hlist_nulls_entry(init_net.ct.hash[i].first,
1593                                        struct nf_conntrack_tuple_hash, hnnode);
1594                        ct = nf_ct_tuplehash_to_ctrack(h);
1595                        hlist_nulls_del_rcu(&h->hnnode);
1596                        bucket = __hash_conntrack(&h->tuple, hashsize);
1597                        hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1598                }
1599        }
1600        old_size = init_net.ct.htable_size;
1601        old_hash = init_net.ct.hash;
1602
1603        init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
1604        init_net.ct.hash = hash;
1605
1606        write_seqcount_end(&init_net.ct.generation);
1607        nf_conntrack_all_unlock();
1608        local_bh_enable();
1609
1610        nf_ct_free_hashtable(old_hash, old_size);
1611        return 0;
1612}
1613EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1614
1615module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1616                  &nf_conntrack_htable_size, 0600);
1617
1618void nf_ct_untracked_status_or(unsigned long bits)
1619{
1620        int cpu;
1621
1622        for_each_possible_cpu(cpu)
1623                per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1624}
1625EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1626
1627int nf_conntrack_init_start(void)
1628{
1629        int max_factor = 8;
1630        int i, ret, cpu;
1631
1632        for (i = 0; i < CONNTRACK_LOCKS; i++)
1633                spin_lock_init(&nf_conntrack_locks[i]);
1634
1635        if (!nf_conntrack_htable_size) {
1636                /* Idea from tcp.c: use 1/16384 of memory.
1637                 * On i386: 32MB machine has 512 buckets.
1638                 * >= 1GB machines have 16384 buckets.
1639                 * >= 4GB machines have 65536 buckets.
1640                 */
1641                nf_conntrack_htable_size
1642                        = (((totalram_pages << PAGE_SHIFT) / 16384)
1643                           / sizeof(struct hlist_head));
1644                if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
1645                        nf_conntrack_htable_size = 65536;
1646                else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
1647                        nf_conntrack_htable_size = 16384;
1648                if (nf_conntrack_htable_size < 32)
1649                        nf_conntrack_htable_size = 32;
1650
1651                /* Use a max. factor of four by default to get the same max as
1652                 * with the old struct list_heads. When a table size is given
1653                 * we use the old value of 8 to avoid reducing the max.
1654                 * entries. */
1655                max_factor = 4;
1656        }
1657        nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1658
1659        printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1660               NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1661               nf_conntrack_max);
1662
1663        ret = nf_conntrack_expect_init();
1664        if (ret < 0)
1665                goto err_expect;
1666
1667        ret = nf_conntrack_acct_init();
1668        if (ret < 0)
1669                goto err_acct;
1670
1671        ret = nf_conntrack_tstamp_init();
1672        if (ret < 0)
1673                goto err_tstamp;
1674
1675        ret = nf_conntrack_ecache_init();
1676        if (ret < 0)
1677                goto err_ecache;
1678
1679        ret = nf_conntrack_timeout_init();
1680        if (ret < 0)
1681                goto err_timeout;
1682
1683        ret = nf_conntrack_helper_init();
1684        if (ret < 0)
1685                goto err_helper;
1686
1687        ret = nf_conntrack_labels_init();
1688        if (ret < 0)
1689                goto err_labels;
1690
1691        ret = nf_conntrack_seqadj_init();
1692        if (ret < 0)
1693                goto err_seqadj;
1694
1695#ifdef CONFIG_NF_CONNTRACK_ZONES
1696        ret = nf_ct_extend_register(&nf_ct_zone_extend);
1697        if (ret < 0)
1698                goto err_extend;
1699#endif
1700        ret = nf_conntrack_proto_init();
1701        if (ret < 0)
1702                goto err_proto;
1703
1704        /* Set up fake conntrack: to never be deleted, not in any hashes */
1705        for_each_possible_cpu(cpu) {
1706                struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1707                write_pnet(&ct->ct_net, &init_net);
1708                atomic_set(&ct->ct_general.use, 1);
1709        }
1710        /*  - and look it like as a confirmed connection */
1711        nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1712        return 0;
1713
1714err_proto:
1715#ifdef CONFIG_NF_CONNTRACK_ZONES
1716        nf_ct_extend_unregister(&nf_ct_zone_extend);
1717err_extend:
1718#endif
1719        nf_conntrack_seqadj_fini();
1720err_seqadj:
1721        nf_conntrack_labels_fini();
1722err_labels:
1723        nf_conntrack_helper_fini();
1724err_helper:
1725        nf_conntrack_timeout_fini();
1726err_timeout:
1727        nf_conntrack_ecache_fini();
1728err_ecache:
1729        nf_conntrack_tstamp_fini();
1730err_tstamp:
1731        nf_conntrack_acct_fini();
1732err_acct:
1733        nf_conntrack_expect_fini();
1734err_expect:
1735        return ret;
1736}
1737
1738void nf_conntrack_init_end(void)
1739{
1740        /* For use by REJECT target */
1741        RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1742        RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1743}
1744
1745/*
1746 * We need to use special "null" values, not used in hash table
1747 */
1748#define UNCONFIRMED_NULLS_VAL   ((1<<30)+0)
1749#define DYING_NULLS_VAL         ((1<<30)+1)
1750#define TEMPLATE_NULLS_VAL      ((1<<30)+2)
1751
1752int nf_conntrack_init_net(struct net *net)
1753{
1754        int ret = -ENOMEM;
1755        int cpu;
1756
1757        atomic_set(&net->ct.count, 0);
1758        seqcount_init(&net->ct.generation);
1759
1760        net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
1761        if (!net->ct.pcpu_lists)
1762                goto err_stat;
1763
1764        for_each_possible_cpu(cpu) {
1765                struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1766
1767                spin_lock_init(&pcpu->lock);
1768                INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
1769                INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
1770        }
1771
1772        net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1773        if (!net->ct.stat)
1774                goto err_pcpu_lists;
1775
1776        net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
1777        if (!net->ct.slabname)
1778                goto err_slabname;
1779
1780        net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
1781                                                        sizeof(struct nf_conn), 0,
1782                                                        SLAB_DESTROY_BY_RCU, NULL);
1783        if (!net->ct.nf_conntrack_cachep) {
1784                printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1785                goto err_cache;
1786        }
1787
1788        net->ct.htable_size = nf_conntrack_htable_size;
1789        net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1790        if (!net->ct.hash) {
1791                printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1792                goto err_hash;
1793        }
1794        ret = nf_conntrack_expect_pernet_init(net);
1795        if (ret < 0)
1796                goto err_expect;
1797        ret = nf_conntrack_acct_pernet_init(net);
1798        if (ret < 0)
1799                goto err_acct;
1800        ret = nf_conntrack_tstamp_pernet_init(net);
1801        if (ret < 0)
1802                goto err_tstamp;
1803        ret = nf_conntrack_ecache_pernet_init(net);
1804        if (ret < 0)
1805                goto err_ecache;
1806        ret = nf_conntrack_helper_pernet_init(net);
1807        if (ret < 0)
1808                goto err_helper;
1809        ret = nf_conntrack_proto_pernet_init(net);
1810        if (ret < 0)
1811                goto err_proto;
1812        return 0;
1813
1814err_proto:
1815        nf_conntrack_helper_pernet_fini(net);
1816err_helper:
1817        nf_conntrack_ecache_pernet_fini(net);
1818err_ecache:
1819        nf_conntrack_tstamp_pernet_fini(net);
1820err_tstamp:
1821        nf_conntrack_acct_pernet_fini(net);
1822err_acct:
1823        nf_conntrack_expect_pernet_fini(net);
1824err_expect:
1825        nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1826err_hash:
1827        kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1828err_cache:
1829        kfree(net->ct.slabname);
1830err_slabname:
1831        free_percpu(net->ct.stat);
1832err_pcpu_lists:
1833        free_percpu(net->ct.pcpu_lists);
1834err_stat:
1835        return ret;
1836}
1837