linux/net/netfilter/nf_flow_table.c
<<
>>
Prefs
   1#include <linux/kernel.h>
   2#include <linux/init.h>
   3#include <linux/module.h>
   4#include <linux/netfilter.h>
   5#include <linux/rhashtable.h>
   6#include <linux/netdevice.h>
   7#include <net/netfilter/nf_tables.h>
   8#include <net/netfilter/nf_flow_table.h>
   9#include <net/netfilter/nf_conntrack.h>
  10#include <net/netfilter/nf_conntrack_core.h>
  11#include <net/netfilter/nf_conntrack_tuple.h>
  12
  13struct flow_offload_entry {
  14        struct flow_offload     flow;
  15        struct nf_conn          *ct;
  16        struct rcu_head         rcu_head;
  17};
  18
  19struct flow_offload *
  20flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
  21{
  22        struct flow_offload_entry *entry;
  23        struct flow_offload *flow;
  24
  25        if (unlikely(nf_ct_is_dying(ct) ||
  26            !atomic_inc_not_zero(&ct->ct_general.use)))
  27                return NULL;
  28
  29        entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
  30        if (!entry)
  31                goto err_ct_refcnt;
  32
  33        flow = &entry->flow;
  34
  35        if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
  36                goto err_dst_cache_original;
  37
  38        if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
  39                goto err_dst_cache_reply;
  40
  41        entry->ct = ct;
  42
  43        switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
  44        case NFPROTO_IPV4:
  45                flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
  46                        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
  47                flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
  48                        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
  49                flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
  50                        ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
  51                flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
  52                        ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
  53                break;
  54        case NFPROTO_IPV6:
  55                flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
  56                        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
  57                flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
  58                        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
  59                flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
  60                        ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
  61                flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
  62                        ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
  63                break;
  64        }
  65
  66        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
  67                ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
  68        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
  69                ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
  70        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
  71                ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
  72        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
  73                ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
  74
  75        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
  76                  route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
  77        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
  78                  route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
  79
  80        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
  81                ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
  82        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
  83                ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
  84        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
  85                ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
  86        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
  87                ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
  88
  89        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
  90                                                FLOW_OFFLOAD_DIR_ORIGINAL;
  91        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
  92                                                FLOW_OFFLOAD_DIR_REPLY;
  93
  94        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
  95                route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
  96        flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
  97                route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
  98        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
  99                route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
 100        flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
 101                route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
 102
 103        if (ct->status & IPS_SRC_NAT)
 104                flow->flags |= FLOW_OFFLOAD_SNAT;
 105        else if (ct->status & IPS_DST_NAT)
 106                flow->flags |= FLOW_OFFLOAD_DNAT;
 107
 108        return flow;
 109
 110err_dst_cache_reply:
 111        dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
 112err_dst_cache_original:
 113        kfree(entry);
 114err_ct_refcnt:
 115        nf_ct_put(ct);
 116
 117        return NULL;
 118}
 119EXPORT_SYMBOL_GPL(flow_offload_alloc);
 120
 121void flow_offload_free(struct flow_offload *flow)
 122{
 123        struct flow_offload_entry *e;
 124
 125        dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 126        dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 127        e = container_of(flow, struct flow_offload_entry, flow);
 128        nf_ct_delete(e->ct, 0, 0);
 129        nf_ct_put(e->ct);
 130        kfree_rcu(e, rcu_head);
 131}
 132EXPORT_SYMBOL_GPL(flow_offload_free);
 133
 134void flow_offload_dead(struct flow_offload *flow)
 135{
 136        flow->flags |= FLOW_OFFLOAD_DYING;
 137}
 138EXPORT_SYMBOL_GPL(flow_offload_dead);
 139
 140int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 141{
 142        flow->timeout = (u32)jiffies;
 143
 144        rhashtable_insert_fast(&flow_table->rhashtable,
 145                               &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 146                               *flow_table->type->params);
 147        rhashtable_insert_fast(&flow_table->rhashtable,
 148                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 149                               *flow_table->type->params);
 150        return 0;
 151}
 152EXPORT_SYMBOL_GPL(flow_offload_add);
 153
 154static void flow_offload_del(struct nf_flowtable *flow_table,
 155                             struct flow_offload *flow)
 156{
 157        rhashtable_remove_fast(&flow_table->rhashtable,
 158                               &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 159                               *flow_table->type->params);
 160        rhashtable_remove_fast(&flow_table->rhashtable,
 161                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 162                               *flow_table->type->params);
 163
 164        flow_offload_free(flow);
 165}
 166
 167struct flow_offload_tuple_rhash *
 168flow_offload_lookup(struct nf_flowtable *flow_table,
 169                    struct flow_offload_tuple *tuple)
 170{
 171        return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
 172                                      *flow_table->type->params);
 173}
 174EXPORT_SYMBOL_GPL(flow_offload_lookup);
 175
 176int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 177                          void (*iter)(struct flow_offload *flow, void *data),
 178                          void *data)
 179{
 180        struct flow_offload_tuple_rhash *tuplehash;
 181        struct rhashtable_iter hti;
 182        struct flow_offload *flow;
 183        int err;
 184
 185        err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
 186        if (err)
 187                return err;
 188
 189        rhashtable_walk_start(&hti);
 190
 191        while ((tuplehash = rhashtable_walk_next(&hti))) {
 192                if (IS_ERR(tuplehash)) {
 193                        err = PTR_ERR(tuplehash);
 194                        if (err != -EAGAIN)
 195                                goto out;
 196
 197                        continue;
 198                }
 199                if (tuplehash->tuple.dir)
 200                        continue;
 201
 202                flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 203
 204                iter(flow, data);
 205        }
 206out:
 207        rhashtable_walk_stop(&hti);
 208        rhashtable_walk_exit(&hti);
 209
 210        return err;
 211}
 212EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
 213
 214static inline bool nf_flow_has_expired(const struct flow_offload *flow)
 215{
 216        return (__s32)(flow->timeout - (u32)jiffies) <= 0;
 217}
 218
 219static inline bool nf_flow_is_dying(const struct flow_offload *flow)
 220{
 221        return flow->flags & FLOW_OFFLOAD_DYING;
 222}
 223
 224static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 225{
 226        struct flow_offload_tuple_rhash *tuplehash;
 227        struct rhashtable_iter hti;
 228        struct flow_offload *flow;
 229        int err;
 230
 231        err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
 232        if (err)
 233                return 0;
 234
 235        rhashtable_walk_start(&hti);
 236
 237        while ((tuplehash = rhashtable_walk_next(&hti))) {
 238                if (IS_ERR(tuplehash)) {
 239                        err = PTR_ERR(tuplehash);
 240                        if (err != -EAGAIN)
 241                                goto out;
 242
 243                        continue;
 244                }
 245                if (tuplehash->tuple.dir)
 246                        continue;
 247
 248                flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 249
 250                if (nf_flow_has_expired(flow) ||
 251                    nf_flow_is_dying(flow))
 252                        flow_offload_del(flow_table, flow);
 253        }
 254out:
 255        rhashtable_walk_stop(&hti);
 256        rhashtable_walk_exit(&hti);
 257
 258        return 1;
 259}
 260
 261void nf_flow_offload_work_gc(struct work_struct *work)
 262{
 263        struct nf_flowtable *flow_table;
 264
 265        flow_table = container_of(work, struct nf_flowtable, gc_work.work);
 266        nf_flow_offload_gc_step(flow_table);
 267        queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 268}
 269EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
 270
 271static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
 272{
 273        const struct flow_offload_tuple *tuple = data;
 274
 275        return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
 276}
 277
 278static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
 279{
 280        const struct flow_offload_tuple_rhash *tuplehash = data;
 281
 282        return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
 283}
 284
 285static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
 286                                        const void *ptr)
 287{
 288        const struct flow_offload_tuple *tuple = arg->key;
 289        const struct flow_offload_tuple_rhash *x = ptr;
 290
 291        if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
 292                return 1;
 293
 294        return 0;
 295}
 296
 297const struct rhashtable_params nf_flow_offload_rhash_params = {
 298        .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
 299        .hashfn                 = flow_offload_hash,
 300        .obj_hashfn             = flow_offload_hash_obj,
 301        .obj_cmpfn              = flow_offload_hash_cmp,
 302        .automatic_shrinking    = true,
 303};
 304EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
 305
 306static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 307                                __be16 port, __be16 new_port)
 308{
 309        struct tcphdr *tcph;
 310
 311        if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 312            skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 313                return -1;
 314
 315        tcph = (void *)(skb_network_header(skb) + thoff);
 316        inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
 317
 318        return 0;
 319}
 320
 321static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 322                                __be16 port, __be16 new_port)
 323{
 324        struct udphdr *udph;
 325
 326        if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 327            skb_try_make_writable(skb, thoff + sizeof(*udph)))
 328                return -1;
 329
 330        udph = (void *)(skb_network_header(skb) + thoff);
 331        if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 332                inet_proto_csum_replace2(&udph->check, skb, port,
 333                                         new_port, true);
 334                if (!udph->check)
 335                        udph->check = CSUM_MANGLED_0;
 336        }
 337
 338        return 0;
 339}
 340
 341static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
 342                            u8 protocol, __be16 port, __be16 new_port)
 343{
 344        switch (protocol) {
 345        case IPPROTO_TCP:
 346                if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
 347                        return NF_DROP;
 348                break;
 349        case IPPROTO_UDP:
 350                if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
 351                        return NF_DROP;
 352                break;
 353        }
 354
 355        return 0;
 356}
 357
 358int nf_flow_snat_port(const struct flow_offload *flow,
 359                      struct sk_buff *skb, unsigned int thoff,
 360                      u8 protocol, enum flow_offload_tuple_dir dir)
 361{
 362        struct flow_ports *hdr;
 363        __be16 port, new_port;
 364
 365        if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 366            skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 367                return -1;
 368
 369        hdr = (void *)(skb_network_header(skb) + thoff);
 370
 371        switch (dir) {
 372        case FLOW_OFFLOAD_DIR_ORIGINAL:
 373                port = hdr->source;
 374                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
 375                hdr->source = new_port;
 376                break;
 377        case FLOW_OFFLOAD_DIR_REPLY:
 378                port = hdr->dest;
 379                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
 380                hdr->dest = new_port;
 381                break;
 382        default:
 383                return -1;
 384        }
 385
 386        return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 387}
 388EXPORT_SYMBOL_GPL(nf_flow_snat_port);
 389
 390int nf_flow_dnat_port(const struct flow_offload *flow,
 391                      struct sk_buff *skb, unsigned int thoff,
 392                      u8 protocol, enum flow_offload_tuple_dir dir)
 393{
 394        struct flow_ports *hdr;
 395        __be16 port, new_port;
 396
 397        if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 398            skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 399                return -1;
 400
 401        hdr = (void *)(skb_network_header(skb) + thoff);
 402
 403        switch (dir) {
 404        case FLOW_OFFLOAD_DIR_ORIGINAL:
 405                port = hdr->dest;
 406                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
 407                hdr->dest = new_port;
 408                break;
 409        case FLOW_OFFLOAD_DIR_REPLY:
 410                port = hdr->source;
 411                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
 412                hdr->source = new_port;
 413                break;
 414        default:
 415                return -1;
 416        }
 417
 418        return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 419}
 420EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 421
 422static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 423{
 424        struct net_device *dev = data;
 425
 426        if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
 427                return;
 428
 429        flow_offload_dead(flow);
 430}
 431
 432static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
 433                                          void *data)
 434{
 435        nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
 436        flush_delayed_work(&flowtable->gc_work);
 437}
 438
 439void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
 440{
 441        nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
 442}
 443EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 444
 445void nf_flow_table_free(struct nf_flowtable *flow_table)
 446{
 447        nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
 448        WARN_ON(!nf_flow_offload_gc_step(flow_table));
 449}
 450EXPORT_SYMBOL_GPL(nf_flow_table_free);
 451
 452MODULE_LICENSE("GPL");
 453MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 454