linux/net/netfilter/nf_flow_table_core.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/kernel.h>
   3#include <linux/init.h>
   4#include <linux/module.h>
   5#include <linux/netfilter.h>
   6#include <linux/rhashtable.h>
   7#include <linux/netdevice.h>
   8#include <net/ip.h>
   9#include <net/ip6_route.h>
  10#include <net/netfilter/nf_tables.h>
  11#include <net/netfilter/nf_flow_table.h>
  12#include <net/netfilter/nf_conntrack.h>
  13#include <net/netfilter/nf_conntrack_core.h>
  14#include <net/netfilter/nf_conntrack_l4proto.h>
  15#include <net/netfilter/nf_conntrack_tuple.h>
  16
  17static DEFINE_MUTEX(flowtable_lock);
  18static LIST_HEAD(flowtables);
  19
  20static void
  21flow_offload_fill_dir(struct flow_offload *flow,
  22                      enum flow_offload_tuple_dir dir)
  23{
  24        struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
  25        struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
  26
  27        ft->dir = dir;
  28
  29        switch (ctt->src.l3num) {
  30        case NFPROTO_IPV4:
  31                ft->src_v4 = ctt->src.u3.in;
  32                ft->dst_v4 = ctt->dst.u3.in;
  33                break;
  34        case NFPROTO_IPV6:
  35                ft->src_v6 = ctt->src.u3.in6;
  36                ft->dst_v6 = ctt->dst.u3.in6;
  37                break;
  38        }
  39
  40        ft->l3proto = ctt->src.l3num;
  41        ft->l4proto = ctt->dst.protonum;
  42        ft->src_port = ctt->src.u.tcp.port;
  43        ft->dst_port = ctt->dst.u.tcp.port;
  44}
  45
  46struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
  47{
  48        struct flow_offload *flow;
  49
  50        if (unlikely(nf_ct_is_dying(ct) ||
  51            !atomic_inc_not_zero(&ct->ct_general.use)))
  52                return NULL;
  53
  54        flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
  55        if (!flow)
  56                goto err_ct_refcnt;
  57
  58        flow->ct = ct;
  59
  60        flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
  61        flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
  62
  63        if (ct->status & IPS_SRC_NAT)
  64                flow->flags |= FLOW_OFFLOAD_SNAT;
  65        if (ct->status & IPS_DST_NAT)
  66                flow->flags |= FLOW_OFFLOAD_DNAT;
  67
  68        return flow;
  69
  70err_ct_refcnt:
  71        nf_ct_put(ct);
  72
  73        return NULL;
  74}
  75EXPORT_SYMBOL_GPL(flow_offload_alloc);
  76
  77static int flow_offload_fill_route(struct flow_offload *flow,
  78                                   const struct nf_flow_route *route,
  79                                   enum flow_offload_tuple_dir dir)
  80{
  81        struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
  82        struct dst_entry *other_dst = route->tuple[!dir].dst;
  83        struct dst_entry *dst = route->tuple[dir].dst;
  84
  85        if (!dst_hold_safe(route->tuple[dir].dst))
  86                return -1;
  87
  88        switch (flow_tuple->l3proto) {
  89        case NFPROTO_IPV4:
  90                flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
  91                break;
  92        case NFPROTO_IPV6:
  93                flow_tuple->mtu = ip6_dst_mtu_forward(dst);
  94                break;
  95        }
  96
  97        flow_tuple->iifidx = other_dst->dev->ifindex;
  98        flow_tuple->dst_cache = dst;
  99
 100        return 0;
 101}
 102
 103int flow_offload_route_init(struct flow_offload *flow,
 104                            const struct nf_flow_route *route)
 105{
 106        int err;
 107
 108        err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
 109        if (err < 0)
 110                return err;
 111
 112        err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
 113        if (err < 0)
 114                goto err_route_reply;
 115
 116        flow->type = NF_FLOW_OFFLOAD_ROUTE;
 117
 118        return 0;
 119
 120err_route_reply:
 121        dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
 122
 123        return err;
 124}
 125EXPORT_SYMBOL_GPL(flow_offload_route_init);
 126
 127static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 128{
 129        tcp->state = TCP_CONNTRACK_ESTABLISHED;
 130        tcp->seen[0].td_maxwin = 0;
 131        tcp->seen[1].td_maxwin = 0;
 132}
 133
 134#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
 135#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
 136
 137static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 138{
 139        const struct nf_conntrack_l4proto *l4proto;
 140        int l4num = nf_ct_protonum(ct);
 141        unsigned int timeout;
 142
 143        l4proto = nf_ct_l4proto_find(l4num);
 144        if (!l4proto)
 145                return;
 146
 147        if (l4num == IPPROTO_TCP)
 148                timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
 149        else if (l4num == IPPROTO_UDP)
 150                timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
 151        else
 152                return;
 153
 154        if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
 155                ct->timeout = nfct_time_stamp + timeout;
 156}
 157
 158static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 159{
 160        if (nf_ct_protonum(ct) == IPPROTO_TCP)
 161                flow_offload_fixup_tcp(&ct->proto.tcp);
 162}
 163
 164static void flow_offload_fixup_ct(struct nf_conn *ct)
 165{
 166        flow_offload_fixup_ct_state(ct);
 167        flow_offload_fixup_ct_timeout(ct);
 168}
 169
 170static void flow_offload_route_release(struct flow_offload *flow)
 171{
 172        dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 173        dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 174}
 175
 176void flow_offload_free(struct flow_offload *flow)
 177{
 178        switch (flow->type) {
 179        case NF_FLOW_OFFLOAD_ROUTE:
 180                flow_offload_route_release(flow);
 181                break;
 182        default:
 183                break;
 184        }
 185        if (flow->flags & FLOW_OFFLOAD_DYING)
 186                nf_ct_delete(flow->ct, 0, 0);
 187        nf_ct_put(flow->ct);
 188        kfree_rcu(flow, rcu_head);
 189}
 190EXPORT_SYMBOL_GPL(flow_offload_free);
 191
 192static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
 193{
 194        const struct flow_offload_tuple *tuple = data;
 195
 196        return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
 197}
 198
 199static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
 200{
 201        const struct flow_offload_tuple_rhash *tuplehash = data;
 202
 203        return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
 204}
 205
 206static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
 207                                        const void *ptr)
 208{
 209        const struct flow_offload_tuple *tuple = arg->key;
 210        const struct flow_offload_tuple_rhash *x = ptr;
 211
 212        if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
 213                return 1;
 214
 215        return 0;
 216}
 217
 218static const struct rhashtable_params nf_flow_offload_rhash_params = {
 219        .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
 220        .hashfn                 = flow_offload_hash,
 221        .obj_hashfn             = flow_offload_hash_obj,
 222        .obj_cmpfn              = flow_offload_hash_cmp,
 223        .automatic_shrinking    = true,
 224};
 225
 226int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 227{
 228        int err;
 229
 230        flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
 231
 232        err = rhashtable_insert_fast(&flow_table->rhashtable,
 233                                     &flow->tuplehash[0].node,
 234                                     nf_flow_offload_rhash_params);
 235        if (err < 0)
 236                return err;
 237
 238        err = rhashtable_insert_fast(&flow_table->rhashtable,
 239                                     &flow->tuplehash[1].node,
 240                                     nf_flow_offload_rhash_params);
 241        if (err < 0) {
 242                rhashtable_remove_fast(&flow_table->rhashtable,
 243                                       &flow->tuplehash[0].node,
 244                                       nf_flow_offload_rhash_params);
 245                return err;
 246        }
 247
 248        if (flow_table->flags & NF_FLOWTABLE_HW_OFFLOAD)
 249                nf_flow_offload_add(flow_table, flow);
 250
 251        return 0;
 252}
 253EXPORT_SYMBOL_GPL(flow_offload_add);
 254
 255static inline bool nf_flow_has_expired(const struct flow_offload *flow)
 256{
 257        return nf_flow_timeout_delta(flow->timeout) <= 0;
 258}
 259
 260static void flow_offload_del(struct nf_flowtable *flow_table,
 261                             struct flow_offload *flow)
 262{
 263        rhashtable_remove_fast(&flow_table->rhashtable,
 264                               &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 265                               nf_flow_offload_rhash_params);
 266        rhashtable_remove_fast(&flow_table->rhashtable,
 267                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 268                               nf_flow_offload_rhash_params);
 269
 270        clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
 271
 272        if (nf_flow_has_expired(flow))
 273                flow_offload_fixup_ct(flow->ct);
 274        else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
 275                flow_offload_fixup_ct_timeout(flow->ct);
 276
 277        flow_offload_free(flow);
 278}
 279
 280void flow_offload_teardown(struct flow_offload *flow)
 281{
 282        flow->flags |= FLOW_OFFLOAD_TEARDOWN;
 283
 284        flow_offload_fixup_ct_state(flow->ct);
 285}
 286EXPORT_SYMBOL_GPL(flow_offload_teardown);
 287
 288struct flow_offload_tuple_rhash *
 289flow_offload_lookup(struct nf_flowtable *flow_table,
 290                    struct flow_offload_tuple *tuple)
 291{
 292        struct flow_offload_tuple_rhash *tuplehash;
 293        struct flow_offload *flow;
 294        int dir;
 295
 296        tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
 297                                      nf_flow_offload_rhash_params);
 298        if (!tuplehash)
 299                return NULL;
 300
 301        dir = tuplehash->tuple.dir;
 302        flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 303        if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
 304                return NULL;
 305
 306        if (unlikely(nf_ct_is_dying(flow->ct)))
 307                return NULL;
 308
 309        return tuplehash;
 310}
 311EXPORT_SYMBOL_GPL(flow_offload_lookup);
 312
 313static int
 314nf_flow_table_iterate(struct nf_flowtable *flow_table,
 315                      void (*iter)(struct flow_offload *flow, void *data),
 316                      void *data)
 317{
 318        struct flow_offload_tuple_rhash *tuplehash;
 319        struct rhashtable_iter hti;
 320        struct flow_offload *flow;
 321        int err = 0;
 322
 323        rhashtable_walk_enter(&flow_table->rhashtable, &hti);
 324        rhashtable_walk_start(&hti);
 325
 326        while ((tuplehash = rhashtable_walk_next(&hti))) {
 327                if (IS_ERR(tuplehash)) {
 328                        if (PTR_ERR(tuplehash) != -EAGAIN) {
 329                                err = PTR_ERR(tuplehash);
 330                                break;
 331                        }
 332                        continue;
 333                }
 334                if (tuplehash->tuple.dir)
 335                        continue;
 336
 337                flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 338
 339                iter(flow, data);
 340        }
 341        rhashtable_walk_stop(&hti);
 342        rhashtable_walk_exit(&hti);
 343
 344        return err;
 345}
 346
 347static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
 348{
 349        struct nf_flowtable *flow_table = data;
 350
 351        if (flow->flags & FLOW_OFFLOAD_HW)
 352                nf_flow_offload_stats(flow_table, flow);
 353
 354        if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) ||
 355            (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) {
 356                if (flow->flags & FLOW_OFFLOAD_HW) {
 357                        if (!(flow->flags & FLOW_OFFLOAD_HW_DYING))
 358                                nf_flow_offload_del(flow_table, flow);
 359                        else if (flow->flags & FLOW_OFFLOAD_HW_DEAD)
 360                                flow_offload_del(flow_table, flow);
 361                } else {
 362                        flow_offload_del(flow_table, flow);
 363                }
 364        }
 365}
 366
 367static void nf_flow_offload_work_gc(struct work_struct *work)
 368{
 369        struct nf_flowtable *flow_table;
 370
 371        flow_table = container_of(work, struct nf_flowtable, gc_work.work);
 372        nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 373        queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 374}
 375
 376static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 377                                __be16 port, __be16 new_port)
 378{
 379        struct tcphdr *tcph;
 380
 381        if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 382            skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 383                return -1;
 384
 385        tcph = (void *)(skb_network_header(skb) + thoff);
 386        inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
 387
 388        return 0;
 389}
 390
 391static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 392                                __be16 port, __be16 new_port)
 393{
 394        struct udphdr *udph;
 395
 396        if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 397            skb_try_make_writable(skb, thoff + sizeof(*udph)))
 398                return -1;
 399
 400        udph = (void *)(skb_network_header(skb) + thoff);
 401        if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 402                inet_proto_csum_replace2(&udph->check, skb, port,
 403                                         new_port, true);
 404                if (!udph->check)
 405                        udph->check = CSUM_MANGLED_0;
 406        }
 407
 408        return 0;
 409}
 410
 411static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
 412                            u8 protocol, __be16 port, __be16 new_port)
 413{
 414        switch (protocol) {
 415        case IPPROTO_TCP:
 416                if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
 417                        return NF_DROP;
 418                break;
 419        case IPPROTO_UDP:
 420                if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
 421                        return NF_DROP;
 422                break;
 423        }
 424
 425        return 0;
 426}
 427
 428int nf_flow_snat_port(const struct flow_offload *flow,
 429                      struct sk_buff *skb, unsigned int thoff,
 430                      u8 protocol, enum flow_offload_tuple_dir dir)
 431{
 432        struct flow_ports *hdr;
 433        __be16 port, new_port;
 434
 435        if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 436            skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 437                return -1;
 438
 439        hdr = (void *)(skb_network_header(skb) + thoff);
 440
 441        switch (dir) {
 442        case FLOW_OFFLOAD_DIR_ORIGINAL:
 443                port = hdr->source;
 444                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
 445                hdr->source = new_port;
 446                break;
 447        case FLOW_OFFLOAD_DIR_REPLY:
 448                port = hdr->dest;
 449                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
 450                hdr->dest = new_port;
 451                break;
 452        default:
 453                return -1;
 454        }
 455
 456        return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 457}
 458EXPORT_SYMBOL_GPL(nf_flow_snat_port);
 459
 460int nf_flow_dnat_port(const struct flow_offload *flow,
 461                      struct sk_buff *skb, unsigned int thoff,
 462                      u8 protocol, enum flow_offload_tuple_dir dir)
 463{
 464        struct flow_ports *hdr;
 465        __be16 port, new_port;
 466
 467        if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 468            skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 469                return -1;
 470
 471        hdr = (void *)(skb_network_header(skb) + thoff);
 472
 473        switch (dir) {
 474        case FLOW_OFFLOAD_DIR_ORIGINAL:
 475                port = hdr->dest;
 476                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
 477                hdr->dest = new_port;
 478                break;
 479        case FLOW_OFFLOAD_DIR_REPLY:
 480                port = hdr->source;
 481                new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
 482                hdr->source = new_port;
 483                break;
 484        default:
 485                return -1;
 486        }
 487
 488        return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 489}
 490EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 491
 492int nf_flow_table_init(struct nf_flowtable *flowtable)
 493{
 494        int err;
 495
 496        INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
 497        flow_block_init(&flowtable->flow_block);
 498
 499        err = rhashtable_init(&flowtable->rhashtable,
 500                              &nf_flow_offload_rhash_params);
 501        if (err < 0)
 502                return err;
 503
 504        queue_delayed_work(system_power_efficient_wq,
 505                           &flowtable->gc_work, HZ);
 506
 507        mutex_lock(&flowtable_lock);
 508        list_add(&flowtable->list, &flowtables);
 509        mutex_unlock(&flowtable_lock);
 510
 511        return 0;
 512}
 513EXPORT_SYMBOL_GPL(nf_flow_table_init);
 514
 515static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 516{
 517        struct net_device *dev = data;
 518
 519        if (!dev) {
 520                flow_offload_teardown(flow);
 521                return;
 522        }
 523
 524        if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
 525            (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
 526             flow->tuplehash[1].tuple.iifidx == dev->ifindex))
 527                flow_offload_dead(flow);
 528}
 529
 530static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
 531                                          struct net_device *dev)
 532{
 533        nf_flow_table_offload_flush(flowtable);
 534        nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
 535        flush_delayed_work(&flowtable->gc_work);
 536}
 537
 538void nf_flow_table_cleanup(struct net_device *dev)
 539{
 540        struct nf_flowtable *flowtable;
 541
 542        mutex_lock(&flowtable_lock);
 543        list_for_each_entry(flowtable, &flowtables, list)
 544                nf_flow_table_iterate_cleanup(flowtable, dev);
 545        mutex_unlock(&flowtable_lock);
 546}
 547EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 548
 549void nf_flow_table_free(struct nf_flowtable *flow_table)
 550{
 551        mutex_lock(&flowtable_lock);
 552        list_del(&flow_table->list);
 553        mutex_unlock(&flowtable_lock);
 554        cancel_delayed_work_sync(&flow_table->gc_work);
 555        nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
 556        nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 557        rhashtable_destroy(&flow_table->rhashtable);
 558}
 559EXPORT_SYMBOL_GPL(nf_flow_table_free);
 560
 561static int __init nf_flow_table_module_init(void)
 562{
 563        return nf_flow_table_offload_init();
 564}
 565
 566static void __exit nf_flow_table_module_exit(void)
 567{
 568        nf_flow_table_offload_exit();
 569}
 570
 571module_init(nf_flow_table_module_init);
 572module_exit(nf_flow_table_module_exit);
 573
 574MODULE_LICENSE("GPL");
 575MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 576