linux/net/bridge/netfilter/nf_conntrack_bridge.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#include <linux/types.h>
   3#include <linux/ip.h>
   4#include <linux/netfilter.h>
   5#include <linux/netfilter_ipv6.h>
   6#include <linux/netfilter_bridge.h>
   7#include <linux/module.h>
   8#include <linux/skbuff.h>
   9#include <linux/icmp.h>
  10#include <linux/sysctl.h>
  11#include <net/route.h>
  12#include <net/ip.h>
  13
  14#include <net/netfilter/nf_conntrack.h>
  15#include <net/netfilter/nf_conntrack_core.h>
  16#include <net/netfilter/nf_conntrack_helper.h>
  17#include <net/netfilter/nf_conntrack_bridge.h>
  18
  19#include <linux/netfilter/nf_tables.h>
  20#include <net/netfilter/nf_tables.h>
  21
  22#include "../br_private.h"
  23
  24/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
  25 * has been linearized or cloned.
  26 */
  27static int nf_br_ip_fragment(struct net *net, struct sock *sk,
  28                             struct sk_buff *skb,
  29                             struct nf_bridge_frag_data *data,
  30                             int (*output)(struct net *, struct sock *sk,
  31                                           const struct nf_bridge_frag_data *data,
  32                                           struct sk_buff *))
  33{
  34        int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
  35        unsigned int hlen, ll_rs, mtu;
  36        ktime_t tstamp = skb->tstamp;
  37        struct ip_frag_state state;
  38        struct iphdr *iph;
  39        int err;
  40
  41        /* for offloaded checksums cleanup checksum before fragmentation */
  42        if (skb->ip_summed == CHECKSUM_PARTIAL &&
  43            (err = skb_checksum_help(skb)))
  44                goto blackhole;
  45
  46        iph = ip_hdr(skb);
  47
  48        /*
  49         *      Setup starting values
  50         */
  51
  52        hlen = iph->ihl * 4;
  53        frag_max_size -= hlen;
  54        ll_rs = LL_RESERVED_SPACE(skb->dev);
  55        mtu = skb->dev->mtu;
  56
  57        if (skb_has_frag_list(skb)) {
  58                unsigned int first_len = skb_pagelen(skb);
  59                struct ip_fraglist_iter iter;
  60                struct sk_buff *frag;
  61
  62                if (first_len - hlen > mtu ||
  63                    skb_headroom(skb) < ll_rs)
  64                        goto blackhole;
  65
  66                if (skb_cloned(skb))
  67                        goto slow_path;
  68
  69                skb_walk_frags(skb, frag) {
  70                        if (frag->len > mtu ||
  71                            skb_headroom(frag) < hlen + ll_rs)
  72                                goto blackhole;
  73
  74                        if (skb_shared(frag))
  75                                goto slow_path;
  76                }
  77
  78                ip_fraglist_init(skb, iph, hlen, &iter);
  79
  80                for (;;) {
  81                        if (iter.frag)
  82                                ip_fraglist_prepare(skb, &iter);
  83
  84                        skb->tstamp = tstamp;
  85                        err = output(net, sk, data, skb);
  86                        if (err || !iter.frag)
  87                                break;
  88
  89                        skb = ip_fraglist_next(&iter);
  90                }
  91
  92                if (!err)
  93                        return 0;
  94
  95                kfree_skb_list(iter.frag);
  96
  97                return err;
  98        }
  99slow_path:
 100        /* This is a linearized skbuff, the original geometry is lost for us.
 101         * This may also be a clone skbuff, we could preserve the geometry for
 102         * the copies but probably not worth the effort.
 103         */
 104        ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
 105
 106        while (state.left > 0) {
 107                struct sk_buff *skb2;
 108
 109                skb2 = ip_frag_next(skb, &state);
 110                if (IS_ERR(skb2)) {
 111                        err = PTR_ERR(skb2);
 112                        goto blackhole;
 113                }
 114
 115                skb2->tstamp = tstamp;
 116                err = output(net, sk, data, skb2);
 117                if (err)
 118                        goto blackhole;
 119        }
 120        consume_skb(skb);
 121        return err;
 122
 123blackhole:
 124        kfree_skb(skb);
 125        return 0;
 126}
 127
 128/* ip_defrag() expects IPCB() in place. */
 129static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
 130                           size_t inet_skb_parm_size)
 131{
 132        memcpy(cb, skb->cb, sizeof(*cb));
 133        memset(skb->cb, 0, inet_skb_parm_size);
 134}
 135
 136static void br_skb_cb_restore(struct sk_buff *skb,
 137                              const struct br_input_skb_cb *cb,
 138                              u16 fragsz)
 139{
 140        memcpy(skb->cb, cb, sizeof(*cb));
 141        BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
 142}
 143
 144static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
 145                                     const struct nf_hook_state *state)
 146{
 147        u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 148        enum ip_conntrack_info ctinfo;
 149        struct br_input_skb_cb cb;
 150        const struct nf_conn *ct;
 151        int err;
 152
 153        if (!ip_is_fragment(ip_hdr(skb)))
 154                return NF_ACCEPT;
 155
 156        ct = nf_ct_get(skb, &ctinfo);
 157        if (ct)
 158                zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
 159
 160        br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
 161        local_bh_disable();
 162        err = ip_defrag(state->net, skb,
 163                        IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
 164        local_bh_enable();
 165        if (!err) {
 166                br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
 167                skb->ignore_df = 1;
 168                return NF_ACCEPT;
 169        }
 170
 171        return NF_STOLEN;
 172}
 173
 174static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
 175                                     const struct nf_hook_state *state)
 176{
 177#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 178        u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 179        enum ip_conntrack_info ctinfo;
 180        struct br_input_skb_cb cb;
 181        const struct nf_conn *ct;
 182        int err;
 183
 184        ct = nf_ct_get(skb, &ctinfo);
 185        if (ct)
 186                zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
 187
 188        br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
 189
 190        err = nf_ct_frag6_gather(state->net, skb,
 191                                 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
 192        /* queued */
 193        if (err == -EINPROGRESS)
 194                return NF_STOLEN;
 195
 196        br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
 197        return err == 0 ? NF_ACCEPT : NF_DROP;
 198#else
 199        return NF_ACCEPT;
 200#endif
 201}
 202
 203static int nf_ct_br_ip_check(const struct sk_buff *skb)
 204{
 205        const struct iphdr *iph;
 206        int nhoff, len;
 207
 208        nhoff = skb_network_offset(skb);
 209        iph = ip_hdr(skb);
 210        if (iph->ihl < 5 ||
 211            iph->version != 4)
 212                return -1;
 213
 214        len = ntohs(iph->tot_len);
 215        if (skb->len < nhoff + len ||
 216            len < (iph->ihl * 4))
 217                return -1;
 218
 219        return 0;
 220}
 221
 222static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
 223{
 224        const struct ipv6hdr *hdr;
 225        int nhoff, len;
 226
 227        nhoff = skb_network_offset(skb);
 228        hdr = ipv6_hdr(skb);
 229        if (hdr->version != 6)
 230                return -1;
 231
 232        len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
 233        if (skb->len < len)
 234                return -1;
 235
 236        return 0;
 237}
 238
 239static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
 240                                     const struct nf_hook_state *state)
 241{
 242        struct nf_hook_state bridge_state = *state;
 243        enum ip_conntrack_info ctinfo;
 244        struct nf_conn *ct;
 245        u32 len;
 246        int ret;
 247
 248        ct = nf_ct_get(skb, &ctinfo);
 249        if ((ct && !nf_ct_is_template(ct)) ||
 250            ctinfo == IP_CT_UNTRACKED)
 251                return NF_ACCEPT;
 252
 253        switch (skb->protocol) {
 254        case htons(ETH_P_IP):
 255                if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 256                        return NF_ACCEPT;
 257
 258                len = ntohs(ip_hdr(skb)->tot_len);
 259                if (pskb_trim_rcsum(skb, len))
 260                        return NF_ACCEPT;
 261
 262                if (nf_ct_br_ip_check(skb))
 263                        return NF_ACCEPT;
 264
 265                bridge_state.pf = NFPROTO_IPV4;
 266                ret = nf_ct_br_defrag4(skb, &bridge_state);
 267                break;
 268        case htons(ETH_P_IPV6):
 269                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 270                        return NF_ACCEPT;
 271
 272                len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
 273                if (pskb_trim_rcsum(skb, len))
 274                        return NF_ACCEPT;
 275
 276                if (nf_ct_br_ipv6_check(skb))
 277                        return NF_ACCEPT;
 278
 279                bridge_state.pf = NFPROTO_IPV6;
 280                ret = nf_ct_br_defrag6(skb, &bridge_state);
 281                break;
 282        default:
 283                nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 284                return NF_ACCEPT;
 285        }
 286
 287        if (ret != NF_ACCEPT)
 288                return ret;
 289
 290        return nf_conntrack_in(skb, &bridge_state);
 291}
 292
 293static void nf_ct_bridge_frag_save(struct sk_buff *skb,
 294                                   struct nf_bridge_frag_data *data)
 295{
 296        if (skb_vlan_tag_present(skb)) {
 297                data->vlan_present = true;
 298                data->vlan_tci = skb->vlan_tci;
 299                data->vlan_proto = skb->vlan_proto;
 300        } else {
 301                data->vlan_present = false;
 302        }
 303        skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
 304}
 305
 306static unsigned int
 307nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
 308                    int (*output)(struct net *, struct sock *sk,
 309                                  const struct nf_bridge_frag_data *data,
 310                                  struct sk_buff *))
 311{
 312        struct nf_bridge_frag_data data;
 313
 314        if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
 315                return NF_ACCEPT;
 316
 317        nf_ct_bridge_frag_save(skb, &data);
 318        switch (skb->protocol) {
 319        case htons(ETH_P_IP):
 320                nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
 321                break;
 322        case htons(ETH_P_IPV6):
 323                nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
 324                break;
 325        default:
 326                WARN_ON_ONCE(1);
 327                return NF_DROP;
 328        }
 329
 330        return NF_STOLEN;
 331}
 332
 333/* Actually only slow path refragmentation needs this. */
 334static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
 335                                     const struct nf_bridge_frag_data *data)
 336{
 337        int err;
 338
 339        err = skb_cow_head(skb, ETH_HLEN);
 340        if (err) {
 341                kfree_skb(skb);
 342                return -ENOMEM;
 343        }
 344        if (data->vlan_present)
 345                __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
 346        else if (skb_vlan_tag_present(skb))
 347                __vlan_hwaccel_clear_tag(skb);
 348
 349        skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
 350        skb_reset_mac_header(skb);
 351
 352        return 0;
 353}
 354
 355static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
 356                                    const struct nf_bridge_frag_data *data,
 357                                    struct sk_buff *skb)
 358{
 359        int err;
 360
 361        err = nf_ct_bridge_frag_restore(skb, data);
 362        if (err < 0)
 363                return err;
 364
 365        return br_dev_queue_push_xmit(net, sk, skb);
 366}
 367
 368static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
 369{
 370        enum ip_conntrack_info ctinfo;
 371        struct nf_conn *ct;
 372        int protoff;
 373
 374        ct = nf_ct_get(skb, &ctinfo);
 375        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 376                return nf_conntrack_confirm(skb);
 377
 378        switch (skb->protocol) {
 379        case htons(ETH_P_IP):
 380                protoff = skb_network_offset(skb) + ip_hdrlen(skb);
 381                break;
 382        case htons(ETH_P_IPV6): {
 383                 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
 384                __be16 frag_off;
 385
 386                protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
 387                                           &frag_off);
 388                if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
 389                        return nf_conntrack_confirm(skb);
 390                }
 391                break;
 392        default:
 393                return NF_ACCEPT;
 394        }
 395        return nf_confirm(skb, protoff, ct, ctinfo);
 396}
 397
 398static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
 399                                      const struct nf_hook_state *state)
 400{
 401        int ret;
 402
 403        ret = nf_ct_bridge_confirm(skb);
 404        if (ret != NF_ACCEPT)
 405                return ret;
 406
 407        return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
 408}
 409
 410static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
 411        {
 412                .hook           = nf_ct_bridge_pre,
 413                .pf             = NFPROTO_BRIDGE,
 414                .hooknum        = NF_BR_PRE_ROUTING,
 415                .priority       = NF_IP_PRI_CONNTRACK,
 416        },
 417        {
 418                .hook           = nf_ct_bridge_post,
 419                .pf             = NFPROTO_BRIDGE,
 420                .hooknum        = NF_BR_POST_ROUTING,
 421                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
 422        },
 423};
 424
 425static struct nf_ct_bridge_info bridge_info = {
 426        .ops            = nf_ct_bridge_hook_ops,
 427        .ops_size       = ARRAY_SIZE(nf_ct_bridge_hook_ops),
 428        .me             = THIS_MODULE,
 429};
 430
 431static int __init nf_conntrack_l3proto_bridge_init(void)
 432{
 433        nf_ct_bridge_register(&bridge_info);
 434
 435        return 0;
 436}
 437
 438static void __exit nf_conntrack_l3proto_bridge_fini(void)
 439{
 440        nf_ct_bridge_unregister(&bridge_info);
 441}
 442
 443module_init(nf_conntrack_l3proto_bridge_init);
 444module_exit(nf_conntrack_l3proto_bridge_fini);
 445
 446MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
 447MODULE_LICENSE("GPL");
 448