linux/net/netfilter/nft_flow_offload.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/kernel.h>
   3#include <linux/module.h>
   4#include <linux/init.h>
   5#include <linux/netlink.h>
   6#include <linux/netfilter.h>
   7#include <linux/workqueue.h>
   8#include <linux/spinlock.h>
   9#include <linux/netfilter/nf_conntrack_common.h>
  10#include <linux/netfilter/nf_tables.h>
  11#include <net/ip.h> /* for ipv4 options. */
  12#include <net/netfilter/nf_tables.h>
  13#include <net/netfilter/nf_tables_core.h>
  14#include <net/netfilter/nf_conntrack_core.h>
  15#include <net/netfilter/nf_conntrack_extend.h>
  16#include <net/netfilter/nf_flow_table.h>
  17
  18struct nft_flow_offload {
  19        struct nft_flowtable    *flowtable;
  20};
  21
  22static int nft_flow_route(const struct nft_pktinfo *pkt,
  23                          const struct nf_conn *ct,
  24                          struct nf_flow_route *route,
  25                          enum ip_conntrack_dir dir)
  26{
  27        struct dst_entry *this_dst = skb_dst(pkt->skb);
  28        struct dst_entry *other_dst = NULL;
  29        struct flowi fl;
  30
  31        memset(&fl, 0, sizeof(fl));
  32        switch (nft_pf(pkt)) {
  33        case NFPROTO_IPV4:
  34                fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
  35                fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
  36                break;
  37        case NFPROTO_IPV6:
  38                fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
  39                fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
  40                break;
  41        }
  42
  43        nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
  44        if (!other_dst)
  45                return -ENOENT;
  46
  47        route->tuple[dir].dst           = this_dst;
  48        route->tuple[!dir].dst          = other_dst;
  49
  50        return 0;
  51}
  52
  53static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
  54{
  55        if (skb_sec_path(skb))
  56                return true;
  57
  58        if (family == NFPROTO_IPV4) {
  59                const struct ip_options *opt;
  60
  61                opt = &(IPCB(skb)->opt);
  62
  63                if (unlikely(opt->optlen))
  64                        return true;
  65        }
  66
  67        return false;
  68}
  69
  70static void nft_flow_offload_eval(const struct nft_expr *expr,
  71                                  struct nft_regs *regs,
  72                                  const struct nft_pktinfo *pkt)
  73{
  74        struct nft_flow_offload *priv = nft_expr_priv(expr);
  75        struct nf_flowtable *flowtable = &priv->flowtable->data;
  76        struct tcphdr _tcph, *tcph = NULL;
  77        enum ip_conntrack_info ctinfo;
  78        struct nf_flow_route route;
  79        struct flow_offload *flow;
  80        enum ip_conntrack_dir dir;
  81        struct nf_conn *ct;
  82        int ret;
  83
  84        if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt)))
  85                goto out;
  86
  87        ct = nf_ct_get(pkt->skb, &ctinfo);
  88        if (!ct)
  89                goto out;
  90
  91        switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
  92        case IPPROTO_TCP:
  93                tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
  94                                          sizeof(_tcph), &_tcph);
  95                if (unlikely(!tcph || tcph->fin || tcph->rst))
  96                        goto out;
  97                break;
  98        case IPPROTO_UDP:
  99                break;
 100        default:
 101                goto out;
 102        }
 103
 104        if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
 105            ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH))
 106                goto out;
 107
 108        if (!nf_ct_is_confirmed(ct))
 109                goto out;
 110
 111        if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
 112                goto out;
 113
 114        dir = CTINFO2DIR(ctinfo);
 115        if (nft_flow_route(pkt, ct, &route, dir) < 0)
 116                goto err_flow_route;
 117
 118        flow = flow_offload_alloc(ct);
 119        if (!flow)
 120                goto err_flow_alloc;
 121
 122        if (flow_offload_route_init(flow, &route) < 0)
 123                goto err_flow_add;
 124
 125        if (tcph) {
 126                ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 127                ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 128        }
 129
 130        ret = flow_offload_add(flowtable, flow);
 131        if (ret < 0)
 132                goto err_flow_add;
 133
 134        dst_release(route.tuple[!dir].dst);
 135        return;
 136
 137err_flow_add:
 138        flow_offload_free(flow);
 139err_flow_alloc:
 140        dst_release(route.tuple[!dir].dst);
 141err_flow_route:
 142        clear_bit(IPS_OFFLOAD_BIT, &ct->status);
 143out:
 144        regs->verdict.code = NFT_BREAK;
 145}
 146
 147static int nft_flow_offload_validate(const struct nft_ctx *ctx,
 148                                     const struct nft_expr *expr,
 149                                     const struct nft_data **data)
 150{
 151        unsigned int hook_mask = (1 << NF_INET_FORWARD);
 152
 153        return nft_chain_validate_hooks(ctx->chain, hook_mask);
 154}
 155
 156static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = {
 157        [NFTA_FLOW_TABLE_NAME]  = { .type = NLA_STRING,
 158                                    .len = NFT_NAME_MAXLEN - 1 },
 159};
 160
 161static int nft_flow_offload_init(const struct nft_ctx *ctx,
 162                                 const struct nft_expr *expr,
 163                                 const struct nlattr * const tb[])
 164{
 165        struct nft_flow_offload *priv = nft_expr_priv(expr);
 166        u8 genmask = nft_genmask_next(ctx->net);
 167        struct nft_flowtable *flowtable;
 168
 169        if (!tb[NFTA_FLOW_TABLE_NAME])
 170                return -EINVAL;
 171
 172        flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
 173                                         genmask);
 174        if (IS_ERR(flowtable))
 175                return PTR_ERR(flowtable);
 176
 177        priv->flowtable = flowtable;
 178        flowtable->use++;
 179
 180        return nf_ct_netns_get(ctx->net, ctx->family);
 181}
 182
 183static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
 184                                        const struct nft_expr *expr,
 185                                        enum nft_trans_phase phase)
 186{
 187        struct nft_flow_offload *priv = nft_expr_priv(expr);
 188
 189        nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
 190}
 191
 192static void nft_flow_offload_activate(const struct nft_ctx *ctx,
 193                                      const struct nft_expr *expr)
 194{
 195        struct nft_flow_offload *priv = nft_expr_priv(expr);
 196
 197        priv->flowtable->use++;
 198}
 199
 200static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
 201                                     const struct nft_expr *expr)
 202{
 203        nf_ct_netns_put(ctx->net, ctx->family);
 204}
 205
 206static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
 207{
 208        struct nft_flow_offload *priv = nft_expr_priv(expr);
 209
 210        if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
 211                goto nla_put_failure;
 212
 213        return 0;
 214
 215nla_put_failure:
 216        return -1;
 217}
 218
 219static struct nft_expr_type nft_flow_offload_type;
 220static const struct nft_expr_ops nft_flow_offload_ops = {
 221        .type           = &nft_flow_offload_type,
 222        .size           = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
 223        .eval           = nft_flow_offload_eval,
 224        .init           = nft_flow_offload_init,
 225        .activate       = nft_flow_offload_activate,
 226        .deactivate     = nft_flow_offload_deactivate,
 227        .destroy        = nft_flow_offload_destroy,
 228        .validate       = nft_flow_offload_validate,
 229        .dump           = nft_flow_offload_dump,
 230};
 231
 232static struct nft_expr_type nft_flow_offload_type __read_mostly = {
 233        .name           = "flow_offload",
 234        .ops            = &nft_flow_offload_ops,
 235        .policy         = nft_flow_offload_policy,
 236        .maxattr        = NFTA_FLOW_MAX,
 237        .owner          = THIS_MODULE,
 238};
 239
 240static int flow_offload_netdev_event(struct notifier_block *this,
 241                                     unsigned long event, void *ptr)
 242{
 243        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 244
 245        if (event != NETDEV_DOWN)
 246                return NOTIFY_DONE;
 247
 248        nf_flow_table_cleanup(dev);
 249
 250        return NOTIFY_DONE;
 251}
 252
 253static struct notifier_block flow_offload_netdev_notifier = {
 254        .notifier_call  = flow_offload_netdev_event,
 255};
 256
 257static int __init nft_flow_offload_module_init(void)
 258{
 259        int err;
 260
 261        err = register_netdevice_notifier(&flow_offload_netdev_notifier);
 262        if (err)
 263                goto err;
 264
 265        err = nft_register_expr(&nft_flow_offload_type);
 266        if (err < 0)
 267                goto register_expr;
 268
 269        return 0;
 270
 271register_expr:
 272        unregister_netdevice_notifier(&flow_offload_netdev_notifier);
 273err:
 274        return err;
 275}
 276
 277static void __exit nft_flow_offload_module_exit(void)
 278{
 279        nft_unregister_expr(&nft_flow_offload_type);
 280        unregister_netdevice_notifier(&flow_offload_netdev_notifier);
 281}
 282
 283module_init(nft_flow_offload_module_init);
 284module_exit(nft_flow_offload_module_exit);
 285
 286MODULE_LICENSE("GPL");
 287MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 288MODULE_ALIAS_NFT_EXPR("flow_offload");
 289MODULE_DESCRIPTION("nftables hardware flow offload module");
 290