linux/net/netfilter/nf_flow_table_ip.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/kernel.h>
   3#include <linux/init.h>
   4#include <linux/module.h>
   5#include <linux/netfilter.h>
   6#include <linux/rhashtable.h>
   7#include <linux/ip.h>
   8#include <linux/ipv6.h>
   9#include <linux/netdevice.h>
  10#include <net/ip.h>
  11#include <net/ipv6.h>
  12#include <net/ip6_route.h>
  13#include <net/neighbour.h>
  14#include <net/netfilter/nf_flow_table.h>
  15/* For layer 4 checksum field offset. */
  16#include <linux/tcp.h>
  17#include <linux/udp.h>
  18
  19static int nf_flow_state_check(struct flow_offload *flow, int proto,
  20                               struct sk_buff *skb, unsigned int thoff)
  21{
  22        struct tcphdr *tcph;
  23
  24        if (proto != IPPROTO_TCP)
  25                return 0;
  26
  27        if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
  28                return -1;
  29
  30        tcph = (void *)(skb_network_header(skb) + thoff);
  31        if (unlikely(tcph->fin || tcph->rst)) {
  32                flow_offload_teardown(flow);
  33                return -1;
  34        }
  35
  36        return 0;
  37}
  38
  39static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
  40                              __be32 addr, __be32 new_addr)
  41{
  42        struct tcphdr *tcph;
  43
  44        if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
  45            skb_try_make_writable(skb, thoff + sizeof(*tcph)))
  46                return -1;
  47
  48        tcph = (void *)(skb_network_header(skb) + thoff);
  49        inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
  50
  51        return 0;
  52}
  53
  54static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
  55                              __be32 addr, __be32 new_addr)
  56{
  57        struct udphdr *udph;
  58
  59        if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
  60            skb_try_make_writable(skb, thoff + sizeof(*udph)))
  61                return -1;
  62
  63        udph = (void *)(skb_network_header(skb) + thoff);
  64        if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  65                inet_proto_csum_replace4(&udph->check, skb, addr,
  66                                         new_addr, true);
  67                if (!udph->check)
  68                        udph->check = CSUM_MANGLED_0;
  69        }
  70
  71        return 0;
  72}
  73
  74static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
  75                                  unsigned int thoff, __be32 addr,
  76                                  __be32 new_addr)
  77{
  78        switch (iph->protocol) {
  79        case IPPROTO_TCP:
  80                if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
  81                        return NF_DROP;
  82                break;
  83        case IPPROTO_UDP:
  84                if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
  85                        return NF_DROP;
  86                break;
  87        }
  88
  89        return 0;
  90}
  91
  92static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
  93                           struct iphdr *iph, unsigned int thoff,
  94                           enum flow_offload_tuple_dir dir)
  95{
  96        __be32 addr, new_addr;
  97
  98        switch (dir) {
  99        case FLOW_OFFLOAD_DIR_ORIGINAL:
 100                addr = iph->saddr;
 101                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
 102                iph->saddr = new_addr;
 103                break;
 104        case FLOW_OFFLOAD_DIR_REPLY:
 105                addr = iph->daddr;
 106                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
 107                iph->daddr = new_addr;
 108                break;
 109        default:
 110                return -1;
 111        }
 112        csum_replace4(&iph->check, addr, new_addr);
 113
 114        return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 115}
 116
 117static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 118                           struct iphdr *iph, unsigned int thoff,
 119                           enum flow_offload_tuple_dir dir)
 120{
 121        __be32 addr, new_addr;
 122
 123        switch (dir) {
 124        case FLOW_OFFLOAD_DIR_ORIGINAL:
 125                addr = iph->daddr;
 126                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
 127                iph->daddr = new_addr;
 128                break;
 129        case FLOW_OFFLOAD_DIR_REPLY:
 130                addr = iph->saddr;
 131                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
 132                iph->saddr = new_addr;
 133                break;
 134        default:
 135                return -1;
 136        }
 137        csum_replace4(&iph->check, addr, new_addr);
 138
 139        return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 140}
 141
 142static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 143                          unsigned int thoff, enum flow_offload_tuple_dir dir)
 144{
 145        struct iphdr *iph = ip_hdr(skb);
 146
 147        if (flow->flags & FLOW_OFFLOAD_SNAT &&
 148            (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
 149             nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
 150                return -1;
 151        if (flow->flags & FLOW_OFFLOAD_DNAT &&
 152            (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
 153             nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
 154                return -1;
 155
 156        return 0;
 157}
 158
 159static bool ip_has_options(unsigned int thoff)
 160{
 161        return thoff != sizeof(struct iphdr);
 162}
 163
 164static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 165                            struct flow_offload_tuple *tuple)
 166{
 167        struct flow_ports *ports;
 168        unsigned int thoff;
 169        struct iphdr *iph;
 170
 171        if (!pskb_may_pull(skb, sizeof(*iph)))
 172                return -1;
 173
 174        iph = ip_hdr(skb);
 175        thoff = iph->ihl * 4;
 176
 177        if (ip_is_fragment(iph) ||
 178            unlikely(ip_has_options(thoff)))
 179                return -1;
 180
 181        if (iph->protocol != IPPROTO_TCP &&
 182            iph->protocol != IPPROTO_UDP)
 183                return -1;
 184
 185        if (iph->ttl <= 1)
 186                return -1;
 187
 188        thoff = iph->ihl * 4;
 189        if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
 190                return -1;
 191
 192        ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 193
 194        tuple->src_v4.s_addr    = iph->saddr;
 195        tuple->dst_v4.s_addr    = iph->daddr;
 196        tuple->src_port         = ports->source;
 197        tuple->dst_port         = ports->dest;
 198        tuple->l3proto          = AF_INET;
 199        tuple->l4proto          = iph->protocol;
 200        tuple->iifidx           = dev->ifindex;
 201
 202        return 0;
 203}
 204
 205/* Based on ip_exceeds_mtu(). */
 206static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 207{
 208        if (skb->len <= mtu)
 209                return false;
 210
 211        if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 212                return false;
 213
 214        return true;
 215}
 216
 217static int nf_flow_offload_dst_check(struct dst_entry *dst)
 218{
 219        if (unlikely(dst_xfrm(dst)))
 220                return dst_check(dst, 0) ? 0 : -1;
 221
 222        return 0;
 223}
 224
 225static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 226                                      const struct nf_hook_state *state,
 227                                      struct dst_entry *dst)
 228{
 229        skb_orphan(skb);
 230        skb_dst_set_noref(skb, dst);
 231        dst_output(state->net, state->sk, skb);
 232        return NF_STOLEN;
 233}
 234
 235unsigned int
 236nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 237                        const struct nf_hook_state *state)
 238{
 239        struct flow_offload_tuple_rhash *tuplehash;
 240        struct nf_flowtable *flow_table = priv;
 241        struct flow_offload_tuple tuple = {};
 242        enum flow_offload_tuple_dir dir;
 243        struct flow_offload *flow;
 244        struct net_device *outdev;
 245        struct rtable *rt;
 246        unsigned int thoff;
 247        struct iphdr *iph;
 248        __be32 nexthop;
 249
 250        if (skb->protocol != htons(ETH_P_IP))
 251                return NF_ACCEPT;
 252
 253        if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
 254                return NF_ACCEPT;
 255
 256        tuplehash = flow_offload_lookup(flow_table, &tuple);
 257        if (tuplehash == NULL)
 258                return NF_ACCEPT;
 259
 260        dir = tuplehash->tuple.dir;
 261        flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 262        rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
 263        outdev = rt->dst.dev;
 264
 265        if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 266                return NF_ACCEPT;
 267
 268        if (skb_try_make_writable(skb, sizeof(*iph)))
 269                return NF_DROP;
 270
 271        thoff = ip_hdr(skb)->ihl * 4;
 272        if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
 273                return NF_ACCEPT;
 274
 275        if (nf_flow_offload_dst_check(&rt->dst)) {
 276                flow_offload_teardown(flow);
 277                return NF_ACCEPT;
 278        }
 279
 280        if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
 281                return NF_DROP;
 282
 283        flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 284        iph = ip_hdr(skb);
 285        ip_decrease_ttl(iph);
 286        skb->tstamp = 0;
 287
 288        if (unlikely(dst_xfrm(&rt->dst))) {
 289                memset(skb->cb, 0, sizeof(struct inet_skb_parm));
 290                IPCB(skb)->iif = skb->dev->ifindex;
 291                IPCB(skb)->flags = IPSKB_FORWARDED;
 292                return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 293        }
 294
 295        skb->dev = outdev;
 296        nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
 297        skb_dst_set_noref(skb, &rt->dst);
 298        neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
 299
 300        return NF_STOLEN;
 301}
 302EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 303
 304static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
 305                                struct in6_addr *addr,
 306                                struct in6_addr *new_addr)
 307{
 308        struct tcphdr *tcph;
 309
 310        if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 311            skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 312                return -1;
 313
 314        tcph = (void *)(skb_network_header(skb) + thoff);
 315        inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
 316                                  new_addr->s6_addr32, true);
 317
 318        return 0;
 319}
 320
 321static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
 322                                struct in6_addr *addr,
 323                                struct in6_addr *new_addr)
 324{
 325        struct udphdr *udph;
 326
 327        if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 328            skb_try_make_writable(skb, thoff + sizeof(*udph)))
 329                return -1;
 330
 331        udph = (void *)(skb_network_header(skb) + thoff);
 332        if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 333                inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
 334                                          new_addr->s6_addr32, true);
 335                if (!udph->check)
 336                        udph->check = CSUM_MANGLED_0;
 337        }
 338
 339        return 0;
 340}
 341
 342static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
 343                                    unsigned int thoff, struct in6_addr *addr,
 344                                    struct in6_addr *new_addr)
 345{
 346        switch (ip6h->nexthdr) {
 347        case IPPROTO_TCP:
 348                if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
 349                        return NF_DROP;
 350                break;
 351        case IPPROTO_UDP:
 352                if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
 353                        return NF_DROP;
 354                break;
 355        }
 356
 357        return 0;
 358}
 359
 360static int nf_flow_snat_ipv6(const struct flow_offload *flow,
 361                             struct sk_buff *skb, struct ipv6hdr *ip6h,
 362                             unsigned int thoff,
 363                             enum flow_offload_tuple_dir dir)
 364{
 365        struct in6_addr addr, new_addr;
 366
 367        switch (dir) {
 368        case FLOW_OFFLOAD_DIR_ORIGINAL:
 369                addr = ip6h->saddr;
 370                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
 371                ip6h->saddr = new_addr;
 372                break;
 373        case FLOW_OFFLOAD_DIR_REPLY:
 374                addr = ip6h->daddr;
 375                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
 376                ip6h->daddr = new_addr;
 377                break;
 378        default:
 379                return -1;
 380        }
 381
 382        return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 383}
 384
 385static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
 386                             struct sk_buff *skb, struct ipv6hdr *ip6h,
 387                             unsigned int thoff,
 388                             enum flow_offload_tuple_dir dir)
 389{
 390        struct in6_addr addr, new_addr;
 391
 392        switch (dir) {
 393        case FLOW_OFFLOAD_DIR_ORIGINAL:
 394                addr = ip6h->daddr;
 395                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
 396                ip6h->daddr = new_addr;
 397                break;
 398        case FLOW_OFFLOAD_DIR_REPLY:
 399                addr = ip6h->saddr;
 400                new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
 401                ip6h->saddr = new_addr;
 402                break;
 403        default:
 404                return -1;
 405        }
 406
 407        return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 408}
 409
 410static int nf_flow_nat_ipv6(const struct flow_offload *flow,
 411                            struct sk_buff *skb,
 412                            enum flow_offload_tuple_dir dir)
 413{
 414        struct ipv6hdr *ip6h = ipv6_hdr(skb);
 415        unsigned int thoff = sizeof(*ip6h);
 416
 417        if (flow->flags & FLOW_OFFLOAD_SNAT &&
 418            (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
 419             nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
 420                return -1;
 421        if (flow->flags & FLOW_OFFLOAD_DNAT &&
 422            (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
 423             nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
 424                return -1;
 425
 426        return 0;
 427}
 428
 429static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 430                              struct flow_offload_tuple *tuple)
 431{
 432        struct flow_ports *ports;
 433        struct ipv6hdr *ip6h;
 434        unsigned int thoff;
 435
 436        if (!pskb_may_pull(skb, sizeof(*ip6h)))
 437                return -1;
 438
 439        ip6h = ipv6_hdr(skb);
 440
 441        if (ip6h->nexthdr != IPPROTO_TCP &&
 442            ip6h->nexthdr != IPPROTO_UDP)
 443                return -1;
 444
 445        if (ip6h->hop_limit <= 1)
 446                return -1;
 447
 448        thoff = sizeof(*ip6h);
 449        if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
 450                return -1;
 451
 452        ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 453
 454        tuple->src_v6           = ip6h->saddr;
 455        tuple->dst_v6           = ip6h->daddr;
 456        tuple->src_port         = ports->source;
 457        tuple->dst_port         = ports->dest;
 458        tuple->l3proto          = AF_INET6;
 459        tuple->l4proto          = ip6h->nexthdr;
 460        tuple->iifidx           = dev->ifindex;
 461
 462        return 0;
 463}
 464
 465unsigned int
 466nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 467                          const struct nf_hook_state *state)
 468{
 469        struct flow_offload_tuple_rhash *tuplehash;
 470        struct nf_flowtable *flow_table = priv;
 471        struct flow_offload_tuple tuple = {};
 472        enum flow_offload_tuple_dir dir;
 473        const struct in6_addr *nexthop;
 474        struct flow_offload *flow;
 475        struct net_device *outdev;
 476        struct ipv6hdr *ip6h;
 477        struct rt6_info *rt;
 478
 479        if (skb->protocol != htons(ETH_P_IPV6))
 480                return NF_ACCEPT;
 481
 482        if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
 483                return NF_ACCEPT;
 484
 485        tuplehash = flow_offload_lookup(flow_table, &tuple);
 486        if (tuplehash == NULL)
 487                return NF_ACCEPT;
 488
 489        dir = tuplehash->tuple.dir;
 490        flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 491        rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
 492        outdev = rt->dst.dev;
 493
 494        if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 495                return NF_ACCEPT;
 496
 497        if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
 498                                sizeof(*ip6h)))
 499                return NF_ACCEPT;
 500
 501        if (nf_flow_offload_dst_check(&rt->dst)) {
 502                flow_offload_teardown(flow);
 503                return NF_ACCEPT;
 504        }
 505
 506        if (skb_try_make_writable(skb, sizeof(*ip6h)))
 507                return NF_DROP;
 508
 509        if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
 510                return NF_DROP;
 511
 512        flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 513        ip6h = ipv6_hdr(skb);
 514        ip6h->hop_limit--;
 515        skb->tstamp = 0;
 516
 517        if (unlikely(dst_xfrm(&rt->dst))) {
 518                memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 519                IP6CB(skb)->iif = skb->dev->ifindex;
 520                IP6CB(skb)->flags = IP6SKB_FORWARDED;
 521                return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 522        }
 523
 524        skb->dev = outdev;
 525        nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
 526        skb_dst_set_noref(skb, &rt->dst);
 527        neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
 528
 529        return NF_STOLEN;
 530}
 531EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
 532