1#include <linux/kernel.h> 2#include <linux/init.h> 3#include <linux/module.h> 4#include <linux/netfilter.h> 5#include <linux/rhashtable.h> 6#include <linux/netdevice.h> 7#include <net/netfilter/nf_tables.h> 8#include <net/netfilter/nf_flow_table.h> 9#include <net/netfilter/nf_conntrack.h> 10#include <net/netfilter/nf_conntrack_core.h> 11#include <net/netfilter/nf_conntrack_tuple.h> 12 13struct flow_offload_entry { 14 struct flow_offload flow; 15 struct nf_conn *ct; 16 struct rcu_head rcu_head; 17}; 18 19struct flow_offload * 20flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) 21{ 22 struct flow_offload_entry *entry; 23 struct flow_offload *flow; 24 25 if (unlikely(nf_ct_is_dying(ct) || 26 !atomic_inc_not_zero(&ct->ct_general.use))) 27 return NULL; 28 29 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 30 if (!entry) 31 goto err_ct_refcnt; 32 33 flow = &entry->flow; 34 35 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) 36 goto err_dst_cache_original; 37 38 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) 39 goto err_dst_cache_reply; 40 41 entry->ct = ct; 42 43 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { 44 case NFPROTO_IPV4: 45 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = 46 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; 47 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = 48 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; 49 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = 50 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; 51 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = 52 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; 53 break; 54 case NFPROTO_IPV6: 55 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = 56 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; 57 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = 58 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; 59 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = 60 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; 61 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = 62 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; 63 break; 64 } 65 66 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = 67 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 68 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = 69 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; 70 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = 71 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 72 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = 73 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; 74 75 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache = 76 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst; 77 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache = 78 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst; 79 80 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = 81 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; 82 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = 83 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 84 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = 85 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; 86 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = 87 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; 88 89 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = 90 FLOW_OFFLOAD_DIR_ORIGINAL; 91 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = 92 FLOW_OFFLOAD_DIR_REPLY; 93 94 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = 95 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; 96 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = 97 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; 98 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = 99 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; 100 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = 101 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; 102 103 if (ct->status & IPS_SRC_NAT) 104 flow->flags |= FLOW_OFFLOAD_SNAT; 105 else if (ct->status & IPS_DST_NAT) 106 flow->flags |= FLOW_OFFLOAD_DNAT; 107 108 return flow; 109 110err_dst_cache_reply: 111 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); 112err_dst_cache_original: 113 kfree(entry); 114err_ct_refcnt: 115 nf_ct_put(ct); 116 117 return NULL; 118} 119EXPORT_SYMBOL_GPL(flow_offload_alloc); 120 121void flow_offload_free(struct flow_offload *flow) 122{ 123 struct flow_offload_entry *e; 124 125 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); 126 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); 127 e = container_of(flow, struct flow_offload_entry, flow); 128 nf_ct_delete(e->ct, 0, 0); 129 nf_ct_put(e->ct); 130 kfree_rcu(e, rcu_head); 131} 132EXPORT_SYMBOL_GPL(flow_offload_free); 133 134void flow_offload_dead(struct flow_offload *flow) 135{ 136 flow->flags |= FLOW_OFFLOAD_DYING; 137} 138EXPORT_SYMBOL_GPL(flow_offload_dead); 139 140int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) 141{ 142 flow->timeout = (u32)jiffies; 143 144 rhashtable_insert_fast(&flow_table->rhashtable, 145 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 146 *flow_table->type->params); 147 rhashtable_insert_fast(&flow_table->rhashtable, 148 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 149 *flow_table->type->params); 150 return 0; 151} 152EXPORT_SYMBOL_GPL(flow_offload_add); 153 154static void flow_offload_del(struct nf_flowtable *flow_table, 155 struct flow_offload *flow) 156{ 157 rhashtable_remove_fast(&flow_table->rhashtable, 158 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 159 *flow_table->type->params); 160 rhashtable_remove_fast(&flow_table->rhashtable, 161 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 162 *flow_table->type->params); 163 164 flow_offload_free(flow); 165} 166 167struct flow_offload_tuple_rhash * 168flow_offload_lookup(struct nf_flowtable *flow_table, 169 struct flow_offload_tuple *tuple) 170{ 171 return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, 172 *flow_table->type->params); 173} 174EXPORT_SYMBOL_GPL(flow_offload_lookup); 175 176int nf_flow_table_iterate(struct nf_flowtable *flow_table, 177 void (*iter)(struct flow_offload *flow, void *data), 178 void *data) 179{ 180 struct flow_offload_tuple_rhash *tuplehash; 181 struct rhashtable_iter hti; 182 struct flow_offload *flow; 183 int err; 184 185 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); 186 if (err) 187 return err; 188 189 rhashtable_walk_start(&hti); 190 191 while ((tuplehash = rhashtable_walk_next(&hti))) { 192 if (IS_ERR(tuplehash)) { 193 err = PTR_ERR(tuplehash); 194 if (err != -EAGAIN) 195 goto out; 196 197 continue; 198 } 199 if (tuplehash->tuple.dir) 200 continue; 201 202 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 203 204 iter(flow, data); 205 } 206out: 207 rhashtable_walk_stop(&hti); 208 rhashtable_walk_exit(&hti); 209 210 return err; 211} 212EXPORT_SYMBOL_GPL(nf_flow_table_iterate); 213 214static inline bool nf_flow_has_expired(const struct flow_offload *flow) 215{ 216 return (__s32)(flow->timeout - (u32)jiffies) <= 0; 217} 218 219static inline bool nf_flow_is_dying(const struct flow_offload *flow) 220{ 221 return flow->flags & FLOW_OFFLOAD_DYING; 222} 223 224static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) 225{ 226 struct flow_offload_tuple_rhash *tuplehash; 227 struct rhashtable_iter hti; 228 struct flow_offload *flow; 229 int err; 230 231 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); 232 if (err) 233 return 0; 234 235 rhashtable_walk_start(&hti); 236 237 while ((tuplehash = rhashtable_walk_next(&hti))) { 238 if (IS_ERR(tuplehash)) { 239 err = PTR_ERR(tuplehash); 240 if (err != -EAGAIN) 241 goto out; 242 243 continue; 244 } 245 if (tuplehash->tuple.dir) 246 continue; 247 248 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 249 250 if (nf_flow_has_expired(flow) || 251 nf_flow_is_dying(flow)) 252 flow_offload_del(flow_table, flow); 253 } 254out: 255 rhashtable_walk_stop(&hti); 256 rhashtable_walk_exit(&hti); 257 258 return 1; 259} 260 261void nf_flow_offload_work_gc(struct work_struct *work) 262{ 263 struct nf_flowtable *flow_table; 264 265 flow_table = container_of(work, struct nf_flowtable, gc_work.work); 266 nf_flow_offload_gc_step(flow_table); 267 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); 268} 269EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); 270 271static u32 flow_offload_hash(const void *data, u32 len, u32 seed) 272{ 273 const struct flow_offload_tuple *tuple = data; 274 275 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); 276} 277 278static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) 279{ 280 const struct flow_offload_tuple_rhash *tuplehash = data; 281 282 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); 283} 284 285static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, 286 const void *ptr) 287{ 288 const struct flow_offload_tuple *tuple = arg->key; 289 const struct flow_offload_tuple_rhash *x = ptr; 290 291 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) 292 return 1; 293 294 return 0; 295} 296 297const struct rhashtable_params nf_flow_offload_rhash_params = { 298 .head_offset = offsetof(struct flow_offload_tuple_rhash, node), 299 .hashfn = flow_offload_hash, 300 .obj_hashfn = flow_offload_hash_obj, 301 .obj_cmpfn = flow_offload_hash_cmp, 302 .automatic_shrinking = true, 303}; 304EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); 305 306static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, 307 __be16 port, __be16 new_port) 308{ 309 struct tcphdr *tcph; 310 311 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || 312 skb_try_make_writable(skb, thoff + sizeof(*tcph))) 313 return -1; 314 315 tcph = (void *)(skb_network_header(skb) + thoff); 316 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); 317 318 return 0; 319} 320 321static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, 322 __be16 port, __be16 new_port) 323{ 324 struct udphdr *udph; 325 326 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || 327 skb_try_make_writable(skb, thoff + sizeof(*udph))) 328 return -1; 329 330 udph = (void *)(skb_network_header(skb) + thoff); 331 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 332 inet_proto_csum_replace2(&udph->check, skb, port, 333 new_port, true); 334 if (!udph->check) 335 udph->check = CSUM_MANGLED_0; 336 } 337 338 return 0; 339} 340 341static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, 342 u8 protocol, __be16 port, __be16 new_port) 343{ 344 switch (protocol) { 345 case IPPROTO_TCP: 346 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) 347 return NF_DROP; 348 break; 349 case IPPROTO_UDP: 350 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) 351 return NF_DROP; 352 break; 353 } 354 355 return 0; 356} 357 358int nf_flow_snat_port(const struct flow_offload *flow, 359 struct sk_buff *skb, unsigned int thoff, 360 u8 protocol, enum flow_offload_tuple_dir dir) 361{ 362 struct flow_ports *hdr; 363 __be16 port, new_port; 364 365 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || 366 skb_try_make_writable(skb, thoff + sizeof(*hdr))) 367 return -1; 368 369 hdr = (void *)(skb_network_header(skb) + thoff); 370 371 switch (dir) { 372 case FLOW_OFFLOAD_DIR_ORIGINAL: 373 port = hdr->source; 374 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; 375 hdr->source = new_port; 376 break; 377 case FLOW_OFFLOAD_DIR_REPLY: 378 port = hdr->dest; 379 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; 380 hdr->dest = new_port; 381 break; 382 default: 383 return -1; 384 } 385 386 return nf_flow_nat_port(skb, thoff, protocol, port, new_port); 387} 388EXPORT_SYMBOL_GPL(nf_flow_snat_port); 389 390int nf_flow_dnat_port(const struct flow_offload *flow, 391 struct sk_buff *skb, unsigned int thoff, 392 u8 protocol, enum flow_offload_tuple_dir dir) 393{ 394 struct flow_ports *hdr; 395 __be16 port, new_port; 396 397 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || 398 skb_try_make_writable(skb, thoff + sizeof(*hdr))) 399 return -1; 400 401 hdr = (void *)(skb_network_header(skb) + thoff); 402 403 switch (dir) { 404 case FLOW_OFFLOAD_DIR_ORIGINAL: 405 port = hdr->dest; 406 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; 407 hdr->dest = new_port; 408 break; 409 case FLOW_OFFLOAD_DIR_REPLY: 410 port = hdr->source; 411 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; 412 hdr->source = new_port; 413 break; 414 default: 415 return -1; 416 } 417 418 return nf_flow_nat_port(skb, thoff, protocol, port, new_port); 419} 420EXPORT_SYMBOL_GPL(nf_flow_dnat_port); 421 422static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) 423{ 424 struct net_device *dev = data; 425 426 if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) 427 return; 428 429 flow_offload_dead(flow); 430} 431 432static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, 433 void *data) 434{ 435 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); 436 flush_delayed_work(&flowtable->gc_work); 437} 438 439void nf_flow_table_cleanup(struct net *net, struct net_device *dev) 440{ 441 nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); 442} 443EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); 444 445void nf_flow_table_free(struct nf_flowtable *flow_table) 446{ 447 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); 448 WARN_ON(!nf_flow_offload_gc_step(flow_table)); 449} 450EXPORT_SYMBOL_GPL(nf_flow_table_free); 451 452MODULE_LICENSE("GPL"); 453MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 454