linux/net/netfilter/nf_flow_table_offload.c
<<
>>
Prefs
   1#include <linux/kernel.h>
   2#include <linux/init.h>
   3#include <linux/module.h>
   4#include <linux/netfilter.h>
   5#include <linux/rhashtable.h>
   6#include <linux/netdevice.h>
   7#include <linux/tc_act/tc_csum.h>
   8#include <net/flow_offload.h>
   9#include <net/netfilter/nf_flow_table.h>
  10#include <net/netfilter/nf_tables.h>
  11#include <net/netfilter/nf_conntrack.h>
  12#include <net/netfilter/nf_conntrack_acct.h>
  13#include <net/netfilter/nf_conntrack_core.h>
  14#include <net/netfilter/nf_conntrack_tuple.h>
  15
  16static struct workqueue_struct *nf_flow_offload_wq;
  17
  18struct flow_offload_work {
  19        struct list_head        list;
  20        enum flow_cls_command   cmd;
  21        int                     priority;
  22        struct nf_flowtable     *flowtable;
  23        struct flow_offload     *flow;
  24        struct work_struct      work;
  25};
  26
  27#define NF_FLOW_DISSECTOR(__match, __type, __field)     \
  28        (__match)->dissector.offset[__type] =           \
  29                offsetof(struct nf_flow_key, __field)
  30
  31static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
  32                                   struct ip_tunnel_info *tun_info)
  33{
  34        struct nf_flow_key *mask = &match->mask;
  35        struct nf_flow_key *key = &match->key;
  36        unsigned int enc_keys;
  37
  38        if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
  39                return;
  40
  41        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
  42        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
  43        key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
  44        mask->enc_key_id.keyid = 0xffffffff;
  45        enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
  46                   BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
  47
  48        if (ip_tunnel_info_af(tun_info) == AF_INET) {
  49                NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
  50                                  enc_ipv4);
  51                key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
  52                key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
  53                if (key->enc_ipv4.src)
  54                        mask->enc_ipv4.src = 0xffffffff;
  55                if (key->enc_ipv4.dst)
  56                        mask->enc_ipv4.dst = 0xffffffff;
  57                enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
  58                key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
  59        } else {
  60                memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
  61                       sizeof(struct in6_addr));
  62                memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
  63                       sizeof(struct in6_addr));
  64                if (memcmp(&key->enc_ipv6.src, &in6addr_any,
  65                           sizeof(struct in6_addr)))
  66                        memset(&key->enc_ipv6.src, 0xff,
  67                               sizeof(struct in6_addr));
  68                if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
  69                           sizeof(struct in6_addr)))
  70                        memset(&key->enc_ipv6.dst, 0xff,
  71                               sizeof(struct in6_addr));
  72                enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
  73                key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  74        }
  75
  76        match->dissector.used_keys |= enc_keys;
  77}
  78
  79static int nf_flow_rule_match(struct nf_flow_match *match,
  80                              const struct flow_offload_tuple *tuple,
  81                              struct dst_entry *other_dst)
  82{
  83        struct nf_flow_key *mask = &match->mask;
  84        struct nf_flow_key *key = &match->key;
  85        struct ip_tunnel_info *tun_info;
  86
  87        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
  88        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
  89        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
  90        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
  91        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
  92        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
  93        NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
  94
  95        if (other_dst && other_dst->lwtstate) {
  96                tun_info = lwt_tun_info(other_dst->lwtstate);
  97                nf_flow_rule_lwt_match(match, tun_info);
  98        }
  99
 100        key->meta.ingress_ifindex = tuple->iifidx;
 101        mask->meta.ingress_ifindex = 0xffffffff;
 102
 103        switch (tuple->l3proto) {
 104        case AF_INET:
 105                key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 106                key->basic.n_proto = htons(ETH_P_IP);
 107                key->ipv4.src = tuple->src_v4.s_addr;
 108                mask->ipv4.src = 0xffffffff;
 109                key->ipv4.dst = tuple->dst_v4.s_addr;
 110                mask->ipv4.dst = 0xffffffff;
 111                break;
 112       case AF_INET6:
 113                key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 114                key->basic.n_proto = htons(ETH_P_IPV6);
 115                key->ipv6.src = tuple->src_v6;
 116                memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
 117                key->ipv6.dst = tuple->dst_v6;
 118                memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
 119                break;
 120        default:
 121                return -EOPNOTSUPP;
 122        }
 123        mask->control.addr_type = 0xffff;
 124        match->dissector.used_keys |= BIT(key->control.addr_type);
 125        mask->basic.n_proto = 0xffff;
 126
 127        switch (tuple->l4proto) {
 128        case IPPROTO_TCP:
 129                key->tcp.flags = 0;
 130                mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
 131                match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
 132                break;
 133        case IPPROTO_UDP:
 134                break;
 135        default:
 136                return -EOPNOTSUPP;
 137        }
 138
 139        key->basic.ip_proto = tuple->l4proto;
 140        mask->basic.ip_proto = 0xff;
 141
 142        key->tp.src = tuple->src_port;
 143        mask->tp.src = 0xffff;
 144        key->tp.dst = tuple->dst_port;
 145        mask->tp.dst = 0xffff;
 146
 147        match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
 148                                      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 149                                      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 150                                      BIT(FLOW_DISSECTOR_KEY_PORTS);
 151        return 0;
 152}
 153
 154static void flow_offload_mangle(struct flow_action_entry *entry,
 155                                enum flow_action_mangle_base htype, u32 offset,
 156                                const __be32 *value, const __be32 *mask)
 157{
 158        entry->id = FLOW_ACTION_MANGLE;
 159        entry->mangle.htype = htype;
 160        entry->mangle.offset = offset;
 161        memcpy(&entry->mangle.mask, mask, sizeof(u32));
 162        memcpy(&entry->mangle.val, value, sizeof(u32));
 163}
 164
 165static inline struct flow_action_entry *
 166flow_action_entry_next(struct nf_flow_rule *flow_rule)
 167{
 168        int i = flow_rule->rule->action.num_entries++;
 169
 170        return &flow_rule->rule->action.entries[i];
 171}
 172
 173static int flow_offload_eth_src(struct net *net,
 174                                const struct flow_offload *flow,
 175                                enum flow_offload_tuple_dir dir,
 176                                struct nf_flow_rule *flow_rule)
 177{
 178        const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
 179        struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 180        struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
 181        struct net_device *dev;
 182        u32 mask, val;
 183        u16 val16;
 184
 185        dev = dev_get_by_index(net, tuple->iifidx);
 186        if (!dev)
 187                return -ENOENT;
 188
 189        mask = ~0xffff0000;
 190        memcpy(&val16, dev->dev_addr, 2);
 191        val = val16 << 16;
 192        flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 193                            &val, &mask);
 194
 195        mask = ~0xffffffff;
 196        memcpy(&val, dev->dev_addr + 2, 4);
 197        flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
 198                            &val, &mask);
 199        dev_put(dev);
 200
 201        return 0;
 202}
 203
 204static int flow_offload_eth_dst(struct net *net,
 205                                const struct flow_offload *flow,
 206                                enum flow_offload_tuple_dir dir,
 207                                struct nf_flow_rule *flow_rule)
 208{
 209        struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 210        struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
 211        const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
 212        const struct dst_entry *dst_cache;
 213        unsigned char ha[ETH_ALEN];
 214        struct neighbour *n;
 215        u32 mask, val;
 216        u8 nud_state;
 217        u16 val16;
 218
 219        dst_cache = flow->tuplehash[dir].tuple.dst_cache;
 220        n = dst_neigh_lookup(dst_cache, daddr);
 221        if (!n)
 222                return -ENOENT;
 223
 224        read_lock_bh(&n->lock);
 225        nud_state = n->nud_state;
 226        ether_addr_copy(ha, n->ha);
 227        read_unlock_bh(&n->lock);
 228
 229        if (!(nud_state & NUD_VALID)) {
 230                neigh_release(n);
 231                return -ENOENT;
 232        }
 233
 234        mask = ~0xffffffff;
 235        memcpy(&val, ha, 4);
 236        flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
 237                            &val, &mask);
 238
 239        mask = ~0x0000ffff;
 240        memcpy(&val16, ha + 4, 2);
 241        val = val16;
 242        flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 243                            &val, &mask);
 244        neigh_release(n);
 245
 246        return 0;
 247}
 248
 249static void flow_offload_ipv4_snat(struct net *net,
 250                                   const struct flow_offload *flow,
 251                                   enum flow_offload_tuple_dir dir,
 252                                   struct nf_flow_rule *flow_rule)
 253{
 254        struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 255        u32 mask = ~htonl(0xffffffff);
 256        __be32 addr;
 257        u32 offset;
 258
 259        switch (dir) {
 260        case FLOW_OFFLOAD_DIR_ORIGINAL:
 261                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
 262                offset = offsetof(struct iphdr, saddr);
 263                break;
 264        case FLOW_OFFLOAD_DIR_REPLY:
 265                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
 266                offset = offsetof(struct iphdr, daddr);
 267                break;
 268        default:
 269                return;
 270        }
 271
 272        flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
 273                            &addr, &mask);
 274}
 275
 276static void flow_offload_ipv4_dnat(struct net *net,
 277                                   const struct flow_offload *flow,
 278                                   enum flow_offload_tuple_dir dir,
 279                                   struct nf_flow_rule *flow_rule)
 280{
 281        struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 282        u32 mask = ~htonl(0xffffffff);
 283        __be32 addr;
 284        u32 offset;
 285
 286        switch (dir) {
 287        case FLOW_OFFLOAD_DIR_ORIGINAL:
 288                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
 289                offset = offsetof(struct iphdr, daddr);
 290                break;
 291        case FLOW_OFFLOAD_DIR_REPLY:
 292                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
 293                offset = offsetof(struct iphdr, saddr);
 294                break;
 295        default:
 296                return;
 297        }
 298
 299        flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
 300                            &addr, &mask);
 301}
 302
 303static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
 304                                     unsigned int offset,
 305                                     const __be32 *addr, const __be32 *mask)
 306{
 307        struct flow_action_entry *entry;
 308        int i;
 309
 310        for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
 311                entry = flow_action_entry_next(flow_rule);
 312                flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
 313                                    offset + i, &addr[i], mask);
 314        }
 315}
 316
 317static void flow_offload_ipv6_snat(struct net *net,
 318                                   const struct flow_offload *flow,
 319                                   enum flow_offload_tuple_dir dir,
 320                                   struct nf_flow_rule *flow_rule)
 321{
 322        u32 mask = ~htonl(0xffffffff);
 323        const __be32 *addr;
 324        u32 offset;
 325
 326        switch (dir) {
 327        case FLOW_OFFLOAD_DIR_ORIGINAL:
 328                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
 329                offset = offsetof(struct ipv6hdr, saddr);
 330                break;
 331        case FLOW_OFFLOAD_DIR_REPLY:
 332                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
 333                offset = offsetof(struct ipv6hdr, daddr);
 334                break;
 335        default:
 336                return;
 337        }
 338
 339        flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
 340}
 341
 342static void flow_offload_ipv6_dnat(struct net *net,
 343                                   const struct flow_offload *flow,
 344                                   enum flow_offload_tuple_dir dir,
 345                                   struct nf_flow_rule *flow_rule)
 346{
 347        u32 mask = ~htonl(0xffffffff);
 348        const __be32 *addr;
 349        u32 offset;
 350
 351        switch (dir) {
 352        case FLOW_OFFLOAD_DIR_ORIGINAL:
 353                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
 354                offset = offsetof(struct ipv6hdr, daddr);
 355                break;
 356        case FLOW_OFFLOAD_DIR_REPLY:
 357                addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
 358                offset = offsetof(struct ipv6hdr, saddr);
 359                break;
 360        default:
 361                return;
 362        }
 363
 364        flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
 365}
 366
 367static int flow_offload_l4proto(const struct flow_offload *flow)
 368{
 369        u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
 370        u8 type = 0;
 371
 372        switch (protonum) {
 373        case IPPROTO_TCP:
 374                type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
 375                break;
 376        case IPPROTO_UDP:
 377                type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
 378                break;
 379        default:
 380                break;
 381        }
 382
 383        return type;
 384}
 385
 386static void flow_offload_port_snat(struct net *net,
 387                                   const struct flow_offload *flow,
 388                                   enum flow_offload_tuple_dir dir,
 389                                   struct nf_flow_rule *flow_rule)
 390{
 391        struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 392        u32 mask, port;
 393        u32 offset;
 394
 395        switch (dir) {
 396        case FLOW_OFFLOAD_DIR_ORIGINAL:
 397                port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
 398                offset = 0; /* offsetof(struct tcphdr, source); */
 399                port = htonl(port << 16);
 400                mask = ~htonl(0xffff0000);
 401                break;
 402        case FLOW_OFFLOAD_DIR_REPLY:
 403                port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
 404                offset = 0; /* offsetof(struct tcphdr, dest); */
 405                port = htonl(port);
 406                mask = ~htonl(0xffff);
 407                break;
 408        default:
 409                return;
 410        }
 411
 412        flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
 413                            &port, &mask);
 414}
 415
 416static void flow_offload_port_dnat(struct net *net,
 417                                   const struct flow_offload *flow,
 418                                   enum flow_offload_tuple_dir dir,
 419                                   struct nf_flow_rule *flow_rule)
 420{
 421        struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 422        u32 mask, port;
 423        u32 offset;
 424
 425        switch (dir) {
 426        case FLOW_OFFLOAD_DIR_ORIGINAL:
 427                port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
 428                offset = 0; /* offsetof(struct tcphdr, dest); */
 429                port = htonl(port);
 430                mask = ~htonl(0xffff);
 431                break;
 432        case FLOW_OFFLOAD_DIR_REPLY:
 433                port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
 434                offset = 0; /* offsetof(struct tcphdr, source); */
 435                port = htonl(port << 16);
 436                mask = ~htonl(0xffff0000);
 437                break;
 438        default:
 439                return;
 440        }
 441
 442        flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
 443                            &port, &mask);
 444}
 445
 446static void flow_offload_ipv4_checksum(struct net *net,
 447                                       const struct flow_offload *flow,
 448                                       struct nf_flow_rule *flow_rule)
 449{
 450        u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
 451        struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 452
 453        entry->id = FLOW_ACTION_CSUM;
 454        entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
 455
 456        switch (protonum) {
 457        case IPPROTO_TCP:
 458                entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
 459                break;
 460        case IPPROTO_UDP:
 461                entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
 462                break;
 463        }
 464}
 465
 466static void flow_offload_redirect(const struct flow_offload *flow,
 467                                  enum flow_offload_tuple_dir dir,
 468                                  struct nf_flow_rule *flow_rule)
 469{
 470        struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 471        struct rtable *rt;
 472
 473        rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
 474        entry->id = FLOW_ACTION_REDIRECT;
 475        entry->dev = rt->dst.dev;
 476        dev_hold(rt->dst.dev);
 477}
 478
 479static void flow_offload_encap_tunnel(const struct flow_offload *flow,
 480                                      enum flow_offload_tuple_dir dir,
 481                                      struct nf_flow_rule *flow_rule)
 482{
 483        struct flow_action_entry *entry;
 484        struct dst_entry *dst;
 485
 486        dst = flow->tuplehash[dir].tuple.dst_cache;
 487        if (dst && dst->lwtstate) {
 488                struct ip_tunnel_info *tun_info;
 489
 490                tun_info = lwt_tun_info(dst->lwtstate);
 491                if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
 492                        entry = flow_action_entry_next(flow_rule);
 493                        entry->id = FLOW_ACTION_TUNNEL_ENCAP;
 494                        entry->tunnel = tun_info;
 495                }
 496        }
 497}
 498
 499static void flow_offload_decap_tunnel(const struct flow_offload *flow,
 500                                      enum flow_offload_tuple_dir dir,
 501                                      struct nf_flow_rule *flow_rule)
 502{
 503        struct flow_action_entry *entry;
 504        struct dst_entry *dst;
 505
 506        dst = flow->tuplehash[!dir].tuple.dst_cache;
 507        if (dst && dst->lwtstate) {
 508                struct ip_tunnel_info *tun_info;
 509
 510                tun_info = lwt_tun_info(dst->lwtstate);
 511                if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
 512                        entry = flow_action_entry_next(flow_rule);
 513                        entry->id = FLOW_ACTION_TUNNEL_DECAP;
 514                }
 515        }
 516}
 517
 518int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 519                            enum flow_offload_tuple_dir dir,
 520                            struct nf_flow_rule *flow_rule)
 521{
 522        flow_offload_decap_tunnel(flow, dir, flow_rule);
 523        flow_offload_encap_tunnel(flow, dir, flow_rule);
 524
 525        if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
 526            flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
 527                return -1;
 528
 529        if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 530                flow_offload_ipv4_snat(net, flow, dir, flow_rule);
 531                flow_offload_port_snat(net, flow, dir, flow_rule);
 532        }
 533        if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
 534                flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
 535                flow_offload_port_dnat(net, flow, dir, flow_rule);
 536        }
 537        if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
 538            test_bit(NF_FLOW_DNAT, &flow->flags))
 539                flow_offload_ipv4_checksum(net, flow, flow_rule);
 540
 541        flow_offload_redirect(flow, dir, flow_rule);
 542
 543        return 0;
 544}
 545EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
 546
 547int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 548                            enum flow_offload_tuple_dir dir,
 549                            struct nf_flow_rule *flow_rule)
 550{
 551        flow_offload_decap_tunnel(flow, dir, flow_rule);
 552        flow_offload_encap_tunnel(flow, dir, flow_rule);
 553
 554        if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
 555            flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
 556                return -1;
 557
 558        if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 559                flow_offload_ipv6_snat(net, flow, dir, flow_rule);
 560                flow_offload_port_snat(net, flow, dir, flow_rule);
 561        }
 562        if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
 563                flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
 564                flow_offload_port_dnat(net, flow, dir, flow_rule);
 565        }
 566
 567        flow_offload_redirect(flow, dir, flow_rule);
 568
 569        return 0;
 570}
 571EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
 572
 573#define NF_FLOW_RULE_ACTION_MAX 16
 574
 575static struct nf_flow_rule *
 576nf_flow_offload_rule_alloc(struct net *net,
 577                           const struct flow_offload_work *offload,
 578                           enum flow_offload_tuple_dir dir)
 579{
 580        const struct nf_flowtable *flowtable = offload->flowtable;
 581        const struct flow_offload *flow = offload->flow;
 582        const struct flow_offload_tuple *tuple;
 583        struct nf_flow_rule *flow_rule;
 584        struct dst_entry *other_dst;
 585        int err = -ENOMEM;
 586
 587        flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
 588        if (!flow_rule)
 589                goto err_flow;
 590
 591        flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
 592        if (!flow_rule->rule)
 593                goto err_flow_rule;
 594
 595        flow_rule->rule->match.dissector = &flow_rule->match.dissector;
 596        flow_rule->rule->match.mask = &flow_rule->match.mask;
 597        flow_rule->rule->match.key = &flow_rule->match.key;
 598
 599        tuple = &flow->tuplehash[dir].tuple;
 600        other_dst = flow->tuplehash[!dir].tuple.dst_cache;
 601        err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
 602        if (err < 0)
 603                goto err_flow_match;
 604
 605        flow_rule->rule->action.num_entries = 0;
 606        if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
 607                goto err_flow_match;
 608
 609        return flow_rule;
 610
 611err_flow_match:
 612        kfree(flow_rule->rule);
 613err_flow_rule:
 614        kfree(flow_rule);
 615err_flow:
 616        return NULL;
 617}
 618
 619static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
 620{
 621        struct flow_action_entry *entry;
 622        int i;
 623
 624        for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
 625                entry = &flow_rule->rule->action.entries[i];
 626                if (entry->id != FLOW_ACTION_REDIRECT)
 627                        continue;
 628
 629                dev_put(entry->dev);
 630        }
 631        kfree(flow_rule->rule);
 632        kfree(flow_rule);
 633}
 634
 635static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
 636{
 637        int i;
 638
 639        for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
 640                __nf_flow_offload_destroy(flow_rule[i]);
 641}
 642
 643static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
 644                                 struct nf_flow_rule *flow_rule[])
 645{
 646        struct net *net = read_pnet(&offload->flowtable->net);
 647
 648        flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
 649                                                  FLOW_OFFLOAD_DIR_ORIGINAL);
 650        if (!flow_rule[0])
 651                return -ENOMEM;
 652
 653        flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
 654                                                  FLOW_OFFLOAD_DIR_REPLY);
 655        if (!flow_rule[1]) {
 656                __nf_flow_offload_destroy(flow_rule[0]);
 657                return -ENOMEM;
 658        }
 659
 660        return 0;
 661}
 662
 663static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
 664                                 __be16 proto, int priority,
 665                                 enum flow_cls_command cmd,
 666                                 const struct flow_offload_tuple *tuple,
 667                                 struct netlink_ext_ack *extack)
 668{
 669        cls_flow->common.protocol = proto;
 670        cls_flow->common.prio = priority;
 671        cls_flow->common.extack = extack;
 672        cls_flow->command = cmd;
 673        cls_flow->cookie = (unsigned long)tuple;
 674}
 675
 676static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
 677                                 struct flow_offload *flow,
 678                                 struct nf_flow_rule *flow_rule,
 679                                 enum flow_offload_tuple_dir dir,
 680                                 int priority, int cmd,
 681                                 struct flow_stats *stats,
 682                                 struct list_head *block_cb_list)
 683{
 684        struct flow_cls_offload cls_flow = {};
 685        struct flow_block_cb *block_cb;
 686        struct netlink_ext_ack extack;
 687        __be16 proto = ETH_P_ALL;
 688        int err, i = 0;
 689
 690        nf_flow_offload_init(&cls_flow, proto, priority, cmd,
 691                             &flow->tuplehash[dir].tuple, &extack);
 692        if (cmd == FLOW_CLS_REPLACE)
 693                cls_flow.rule = flow_rule->rule;
 694
 695        down_read(&flowtable->flow_block_lock);
 696        list_for_each_entry(block_cb, block_cb_list, list) {
 697                err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
 698                                   block_cb->cb_priv);
 699                if (err < 0)
 700                        continue;
 701
 702                i++;
 703        }
 704        up_read(&flowtable->flow_block_lock);
 705
 706        if (cmd == FLOW_CLS_STATS)
 707                memcpy(stats, &cls_flow.stats, sizeof(*stats));
 708
 709        return i;
 710}
 711
 712static int flow_offload_tuple_add(struct flow_offload_work *offload,
 713                                  struct nf_flow_rule *flow_rule,
 714                                  enum flow_offload_tuple_dir dir)
 715{
 716        return nf_flow_offload_tuple(offload->flowtable, offload->flow,
 717                                     flow_rule, dir, offload->priority,
 718                                     FLOW_CLS_REPLACE, NULL,
 719                                     &offload->flowtable->flow_block.cb_list);
 720}
 721
 722static void flow_offload_tuple_del(struct flow_offload_work *offload,
 723                                   enum flow_offload_tuple_dir dir)
 724{
 725        nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
 726                              offload->priority, FLOW_CLS_DESTROY, NULL,
 727                              &offload->flowtable->flow_block.cb_list);
 728}
 729
 730static int flow_offload_rule_add(struct flow_offload_work *offload,
 731                                 struct nf_flow_rule *flow_rule[])
 732{
 733        int ok_count = 0;
 734
 735        ok_count += flow_offload_tuple_add(offload, flow_rule[0],
 736                                           FLOW_OFFLOAD_DIR_ORIGINAL);
 737        ok_count += flow_offload_tuple_add(offload, flow_rule[1],
 738                                           FLOW_OFFLOAD_DIR_REPLY);
 739        if (ok_count == 0)
 740                return -ENOENT;
 741
 742        return 0;
 743}
 744
 745static void flow_offload_work_add(struct flow_offload_work *offload)
 746{
 747        struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
 748        int err;
 749
 750        err = nf_flow_offload_alloc(offload, flow_rule);
 751        if (err < 0)
 752                return;
 753
 754        err = flow_offload_rule_add(offload, flow_rule);
 755        if (err < 0)
 756                set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
 757        else
 758                set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 759
 760        nf_flow_offload_destroy(flow_rule);
 761}
 762
 763static void flow_offload_work_del(struct flow_offload_work *offload)
 764{
 765        clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 766        flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
 767        flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
 768        set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
 769}
 770
 771static void flow_offload_tuple_stats(struct flow_offload_work *offload,
 772                                     enum flow_offload_tuple_dir dir,
 773                                     struct flow_stats *stats)
 774{
 775        nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
 776                              offload->priority, FLOW_CLS_STATS, stats,
 777                              &offload->flowtable->flow_block.cb_list);
 778}
 779
 780static void flow_offload_work_stats(struct flow_offload_work *offload)
 781{
 782        struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
 783        u64 lastused;
 784
 785        flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
 786        flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
 787
 788        lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
 789        offload->flow->timeout = max_t(u64, offload->flow->timeout,
 790                                       lastused + NF_FLOW_TIMEOUT);
 791
 792        if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
 793                if (stats[0].pkts)
 794                        nf_ct_acct_add(offload->flow->ct,
 795                                       FLOW_OFFLOAD_DIR_ORIGINAL,
 796                                       stats[0].pkts, stats[0].bytes);
 797                if (stats[1].pkts)
 798                        nf_ct_acct_add(offload->flow->ct,
 799                                       FLOW_OFFLOAD_DIR_REPLY,
 800                                       stats[1].pkts, stats[1].bytes);
 801        }
 802}
 803
 804static void flow_offload_work_handler(struct work_struct *work)
 805{
 806        struct flow_offload_work *offload;
 807
 808        offload = container_of(work, struct flow_offload_work, work);
 809        switch (offload->cmd) {
 810                case FLOW_CLS_REPLACE:
 811                        flow_offload_work_add(offload);
 812                        break;
 813                case FLOW_CLS_DESTROY:
 814                        flow_offload_work_del(offload);
 815                        break;
 816                case FLOW_CLS_STATS:
 817                        flow_offload_work_stats(offload);
 818                        break;
 819                default:
 820                        WARN_ON_ONCE(1);
 821        }
 822
 823        clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
 824        kfree(offload);
 825}
 826
 827static void flow_offload_queue_work(struct flow_offload_work *offload)
 828{
 829        queue_work(nf_flow_offload_wq, &offload->work);
 830}
 831
 832static struct flow_offload_work *
 833nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
 834                           struct flow_offload *flow, unsigned int cmd)
 835{
 836        struct flow_offload_work *offload;
 837
 838        if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
 839                return NULL;
 840
 841        offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
 842        if (!offload) {
 843                clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
 844                return NULL;
 845        }
 846
 847        offload->cmd = cmd;
 848        offload->flow = flow;
 849        offload->priority = flowtable->priority;
 850        offload->flowtable = flowtable;
 851        INIT_WORK(&offload->work, flow_offload_work_handler);
 852
 853        return offload;
 854}
 855
 856
 857void nf_flow_offload_add(struct nf_flowtable *flowtable,
 858                         struct flow_offload *flow)
 859{
 860        struct flow_offload_work *offload;
 861
 862        offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
 863        if (!offload)
 864                return;
 865
 866        flow_offload_queue_work(offload);
 867}
 868
 869void nf_flow_offload_del(struct nf_flowtable *flowtable,
 870                         struct flow_offload *flow)
 871{
 872        struct flow_offload_work *offload;
 873
 874        offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
 875        if (!offload)
 876                return;
 877
 878        set_bit(NF_FLOW_HW_DYING, &flow->flags);
 879        flow_offload_queue_work(offload);
 880}
 881
 882void nf_flow_offload_stats(struct nf_flowtable *flowtable,
 883                           struct flow_offload *flow)
 884{
 885        struct flow_offload_work *offload;
 886        __s32 delta;
 887
 888        delta = nf_flow_timeout_delta(flow->timeout);
 889        if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
 890                return;
 891
 892        offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
 893        if (!offload)
 894                return;
 895
 896        flow_offload_queue_work(offload);
 897}
 898
 899void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
 900{
 901        if (nf_flowtable_hw_offload(flowtable))
 902                flush_workqueue(nf_flow_offload_wq);
 903}
 904
 905static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
 906                                     struct flow_block_offload *bo,
 907                                     enum flow_block_command cmd)
 908{
 909        struct flow_block_cb *block_cb, *next;
 910        int err = 0;
 911
 912        switch (cmd) {
 913        case FLOW_BLOCK_BIND:
 914                list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
 915                break;
 916        case FLOW_BLOCK_UNBIND:
 917                list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
 918                        list_del(&block_cb->list);
 919                        flow_block_cb_free(block_cb);
 920                }
 921                break;
 922        default:
 923                WARN_ON_ONCE(1);
 924                err = -EOPNOTSUPP;
 925        }
 926
 927        return err;
 928}
 929
 930static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
 931                                             struct net *net,
 932                                             enum flow_block_command cmd,
 933                                             struct nf_flowtable *flowtable,
 934                                             struct netlink_ext_ack *extack)
 935{
 936        memset(bo, 0, sizeof(*bo));
 937        bo->net         = net;
 938        bo->block       = &flowtable->flow_block;
 939        bo->command     = cmd;
 940        bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 941        bo->extack      = extack;
 942        INIT_LIST_HEAD(&bo->cb_list);
 943}
 944
 945static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
 946{
 947        struct nf_flowtable *flowtable = block_cb->indr.data;
 948        struct net_device *dev = block_cb->indr.dev;
 949
 950        nf_flow_table_gc_cleanup(flowtable, dev);
 951        down_write(&flowtable->flow_block_lock);
 952        list_del(&block_cb->list);
 953        list_del(&block_cb->driver_list);
 954        flow_block_cb_free(block_cb);
 955        up_write(&flowtable->flow_block_lock);
 956}
 957
 958static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
 959                                          struct nf_flowtable *flowtable,
 960                                          struct net_device *dev,
 961                                          enum flow_block_command cmd,
 962                                          struct netlink_ext_ack *extack)
 963{
 964        nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
 965                                         extack);
 966
 967        return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
 968                                           nf_flow_table_indr_cleanup);
 969}
 970
 971static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
 972                                     struct nf_flowtable *flowtable,
 973                                     struct net_device *dev,
 974                                     enum flow_block_command cmd,
 975                                     struct netlink_ext_ack *extack)
 976{
 977        int err;
 978
 979        nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
 980                                         extack);
 981        err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
 982        if (err < 0)
 983                return err;
 984
 985        return 0;
 986}
 987
 988int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
 989                                struct net_device *dev,
 990                                enum flow_block_command cmd)
 991{
 992        struct netlink_ext_ack extack = {};
 993        struct flow_block_offload bo;
 994        int err;
 995
 996        if (!nf_flowtable_hw_offload(flowtable))
 997                return 0;
 998
 999        if (dev->netdev_ops->ndo_setup_tc)
1000                err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1001                                                &extack);
1002        else
1003                err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1004                                                     &extack);
1005        if (err < 0)
1006                return err;
1007
1008        return nf_flow_table_block_setup(flowtable, &bo, cmd);
1009}
1010EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1011
1012int nf_flow_table_offload_init(void)
1013{
1014        nf_flow_offload_wq  = alloc_workqueue("nf_flow_table_offload",
1015                                              WQ_UNBOUND, 0);
1016        if (!nf_flow_offload_wq)
1017                return -ENOMEM;
1018
1019        return 0;
1020}
1021
1022void nf_flow_table_offload_exit(void)
1023{
1024        destroy_workqueue(nf_flow_offload_wq);
1025}
1026