linux/net/bridge/netfilter/nf_conntrack_bridge.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#include <linux/types.h>
   3#include <linux/ip.h>
   4#include <linux/netfilter.h>
   5#include <linux/netfilter_ipv6.h>
   6#include <linux/netfilter_bridge.h>
   7#include <linux/module.h>
   8#include <linux/skbuff.h>
   9#include <linux/icmp.h>
  10#include <linux/sysctl.h>
  11#include <net/route.h>
  12#include <net/ip.h>
  13
  14#include <net/netfilter/nf_conntrack.h>
  15#include <net/netfilter/nf_conntrack_core.h>
  16#include <net/netfilter/nf_conntrack_helper.h>
  17#include <net/netfilter/nf_conntrack_bridge.h>
  18
  19#include <linux/netfilter/nf_tables.h>
  20#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
  21#include <net/netfilter/nf_tables.h>
  22
  23#include "../br_private.h"
  24
  25/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
  26 * has been linearized or cloned.
  27 */
  28static int nf_br_ip_fragment(struct net *net, struct sock *sk,
  29                             struct sk_buff *skb,
  30                             struct nf_ct_bridge_frag_data *data,
  31                             int (*output)(struct net *, struct sock *sk,
  32                                           const struct nf_ct_bridge_frag_data *data,
  33                                           struct sk_buff *))
  34{
  35        int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
  36        unsigned int hlen, ll_rs, mtu;
  37        struct ip_frag_state state;
  38        struct iphdr *iph;
  39        int err;
  40
  41        /* for offloaded checksums cleanup checksum before fragmentation */
  42        if (skb->ip_summed == CHECKSUM_PARTIAL &&
  43            (err = skb_checksum_help(skb)))
  44                goto blackhole;
  45
  46        iph = ip_hdr(skb);
  47
  48        /*
  49         *      Setup starting values
  50         */
  51
  52        hlen = iph->ihl * 4;
  53        frag_max_size -= hlen;
  54        ll_rs = LL_RESERVED_SPACE(skb->dev);
  55        mtu = skb->dev->mtu;
  56
  57        if (skb_has_frag_list(skb)) {
  58                unsigned int first_len = skb_pagelen(skb);
  59                struct ip_fraglist_iter iter;
  60                struct sk_buff *frag;
  61
  62                if (first_len - hlen > mtu ||
  63                    skb_headroom(skb) < ll_rs)
  64                        goto blackhole;
  65
  66                if (skb_cloned(skb))
  67                        goto slow_path;
  68
  69                skb_walk_frags(skb, frag) {
  70                        if (frag->len > mtu ||
  71                            skb_headroom(frag) < hlen + ll_rs)
  72                                goto blackhole;
  73
  74                        if (skb_shared(frag))
  75                                goto slow_path;
  76                }
  77
  78                ip_fraglist_init(skb, iph, hlen, &iter);
  79
  80                for (;;) {
  81                        if (iter.frag)
  82                                ip_fraglist_prepare(skb, &iter);
  83
  84                        err = output(net, sk, data, skb);
  85                        if (err || !iter.frag)
  86                                break;
  87
  88                        skb = ip_fraglist_next(&iter);
  89                }
  90                return err;
  91        }
  92slow_path:
  93        /* This is a linearized skbuff, the original geometry is lost for us.
  94         * This may also be a clone skbuff, we could preserve the geometry for
  95         * the copies but probably not worth the effort.
  96         */
  97        ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
  98
  99        while (state.left > 0) {
 100                struct sk_buff *skb2;
 101
 102                skb2 = ip_frag_next(skb, &state);
 103                if (IS_ERR(skb2)) {
 104                        err = PTR_ERR(skb2);
 105                        goto blackhole;
 106                }
 107
 108                err = output(net, sk, data, skb2);
 109                if (err)
 110                        goto blackhole;
 111        }
 112        consume_skb(skb);
 113        return err;
 114
 115blackhole:
 116        kfree_skb(skb);
 117        return 0;
 118}
 119
 120/* ip_defrag() expects IPCB() in place. */
 121static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
 122                           size_t inet_skb_parm_size)
 123{
 124        memcpy(cb, skb->cb, sizeof(*cb));
 125        memset(skb->cb, 0, inet_skb_parm_size);
 126}
 127
 128static void br_skb_cb_restore(struct sk_buff *skb,
 129                              const struct br_input_skb_cb *cb,
 130                              u16 fragsz)
 131{
 132        memcpy(skb->cb, cb, sizeof(*cb));
 133        BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
 134}
 135
 136static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
 137                                     const struct nf_hook_state *state)
 138{
 139        u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 140        enum ip_conntrack_info ctinfo;
 141        struct br_input_skb_cb cb;
 142        const struct nf_conn *ct;
 143        int err;
 144
 145        if (!ip_is_fragment(ip_hdr(skb)))
 146                return NF_ACCEPT;
 147
 148        ct = nf_ct_get(skb, &ctinfo);
 149        if (ct)
 150                zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
 151
 152        br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
 153        local_bh_disable();
 154        err = ip_defrag(state->net, skb,
 155                        IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
 156        local_bh_enable();
 157        if (!err) {
 158                br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
 159                skb->ignore_df = 1;
 160                return NF_ACCEPT;
 161        }
 162
 163        return NF_STOLEN;
 164}
 165
 166static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
 167                                     const struct nf_hook_state *state)
 168{
 169        u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 170        enum ip_conntrack_info ctinfo;
 171        struct br_input_skb_cb cb;
 172        const struct nf_conn *ct;
 173        int err;
 174
 175        ct = nf_ct_get(skb, &ctinfo);
 176        if (ct)
 177                zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
 178
 179        br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
 180
 181        err = nf_ipv6_br_defrag(state->net, skb,
 182                                IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
 183        /* queued */
 184        if (err == -EINPROGRESS)
 185                return NF_STOLEN;
 186
 187        br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
 188        return err == 0 ? NF_ACCEPT : NF_DROP;
 189}
 190
 191static int nf_ct_br_ip_check(const struct sk_buff *skb)
 192{
 193        const struct iphdr *iph;
 194        int nhoff, len;
 195
 196        nhoff = skb_network_offset(skb);
 197        iph = ip_hdr(skb);
 198        if (iph->ihl < 5 ||
 199            iph->version != 4)
 200                return -1;
 201
 202        len = ntohs(iph->tot_len);
 203        if (skb->len < nhoff + len ||
 204            len < (iph->ihl * 4))
 205                return -1;
 206
 207        return 0;
 208}
 209
 210static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
 211{
 212        const struct ipv6hdr *hdr;
 213        int nhoff, len;
 214
 215        nhoff = skb_network_offset(skb);
 216        hdr = ipv6_hdr(skb);
 217        if (hdr->version != 6)
 218                return -1;
 219
 220        len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
 221        if (skb->len < len)
 222                return -1;
 223
 224        return 0;
 225}
 226
 227static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
 228                                     const struct nf_hook_state *state)
 229{
 230        struct nf_hook_state bridge_state = *state;
 231        enum ip_conntrack_info ctinfo;
 232        struct nf_conn *ct;
 233        u32 len;
 234        int ret;
 235
 236        ct = nf_ct_get(skb, &ctinfo);
 237        if ((ct && !nf_ct_is_template(ct)) ||
 238            ctinfo == IP_CT_UNTRACKED)
 239                return NF_ACCEPT;
 240
 241        switch (skb->protocol) {
 242        case htons(ETH_P_IP):
 243                if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 244                        return NF_ACCEPT;
 245
 246                len = ntohs(ip_hdr(skb)->tot_len);
 247                if (pskb_trim_rcsum(skb, len))
 248                        return NF_ACCEPT;
 249
 250                if (nf_ct_br_ip_check(skb))
 251                        return NF_ACCEPT;
 252
 253                bridge_state.pf = NFPROTO_IPV4;
 254                ret = nf_ct_br_defrag4(skb, &bridge_state);
 255                break;
 256        case htons(ETH_P_IPV6):
 257                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 258                        return NF_ACCEPT;
 259
 260                len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
 261                if (pskb_trim_rcsum(skb, len))
 262                        return NF_ACCEPT;
 263
 264                if (nf_ct_br_ipv6_check(skb))
 265                        return NF_ACCEPT;
 266
 267                bridge_state.pf = NFPROTO_IPV6;
 268                ret = nf_ct_br_defrag6(skb, &bridge_state);
 269                break;
 270        default:
 271                nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 272                return NF_ACCEPT;
 273        }
 274
 275        if (ret != NF_ACCEPT)
 276                return ret;
 277
 278        return nf_conntrack_in(skb, &bridge_state);
 279}
 280
 281static void nf_ct_bridge_frag_save(struct sk_buff *skb,
 282                                   struct nf_ct_bridge_frag_data *data)
 283{
 284        if (skb_vlan_tag_present(skb)) {
 285                data->vlan_present = true;
 286                data->vlan_tci = skb->vlan_tci;
 287                data->vlan_proto = skb->vlan_proto;
 288        } else {
 289                data->vlan_present = false;
 290        }
 291        skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
 292}
 293
 294static unsigned int
 295nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
 296                    int (*output)(struct net *, struct sock *sk,
 297                                  const struct nf_ct_bridge_frag_data *data,
 298                                  struct sk_buff *))
 299{
 300        struct nf_ct_bridge_frag_data data;
 301
 302        if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
 303                return NF_ACCEPT;
 304
 305        nf_ct_bridge_frag_save(skb, &data);
 306        switch (skb->protocol) {
 307        case htons(ETH_P_IP):
 308                nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
 309                break;
 310        case htons(ETH_P_IPV6):
 311                nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
 312                break;
 313        default:
 314                WARN_ON_ONCE(1);
 315                return NF_DROP;
 316        }
 317
 318        return NF_STOLEN;
 319}
 320
 321/* Actually only slow path refragmentation needs this. */
 322static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
 323                                     const struct nf_ct_bridge_frag_data *data)
 324{
 325        int err;
 326
 327        err = skb_cow_head(skb, ETH_HLEN);
 328        if (err) {
 329                kfree_skb(skb);
 330                return -ENOMEM;
 331        }
 332        if (data->vlan_present)
 333                __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
 334        else if (skb_vlan_tag_present(skb))
 335                __vlan_hwaccel_clear_tag(skb);
 336
 337        skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
 338        skb_reset_mac_header(skb);
 339
 340        return 0;
 341}
 342
 343static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
 344                                    const struct nf_ct_bridge_frag_data *data,
 345                                    struct sk_buff *skb)
 346{
 347        int err;
 348
 349        err = nf_ct_bridge_frag_restore(skb, data);
 350        if (err < 0)
 351                return err;
 352
 353        return br_dev_queue_push_xmit(net, sk, skb);
 354}
 355
 356static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
 357{
 358        enum ip_conntrack_info ctinfo;
 359        struct nf_conn *ct;
 360        int protoff;
 361
 362        ct = nf_ct_get(skb, &ctinfo);
 363        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 364                return nf_conntrack_confirm(skb);
 365
 366        switch (skb->protocol) {
 367        case htons(ETH_P_IP):
 368                protoff = skb_network_offset(skb) + ip_hdrlen(skb);
 369                break;
 370        case htons(ETH_P_IPV6): {
 371                 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
 372                __be16 frag_off;
 373
 374                protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
 375                                           &frag_off);
 376                if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
 377                        return nf_conntrack_confirm(skb);
 378                }
 379                break;
 380        default:
 381                return NF_ACCEPT;
 382        }
 383        return nf_confirm(skb, protoff, ct, ctinfo);
 384}
 385
 386static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
 387                                      const struct nf_hook_state *state)
 388{
 389        int ret;
 390
 391        ret = nf_ct_bridge_confirm(skb);
 392        if (ret != NF_ACCEPT)
 393                return ret;
 394
 395        return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
 396}
 397
 398static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
 399        {
 400                .hook           = nf_ct_bridge_pre,
 401                .pf             = NFPROTO_BRIDGE,
 402                .hooknum        = NF_BR_PRE_ROUTING,
 403                .priority       = NF_IP_PRI_CONNTRACK,
 404        },
 405        {
 406                .hook           = nf_ct_bridge_post,
 407                .pf             = NFPROTO_BRIDGE,
 408                .hooknum        = NF_BR_POST_ROUTING,
 409                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
 410        },
 411};
 412
 413static struct nf_ct_bridge_info bridge_info = {
 414        .ops            = nf_ct_bridge_hook_ops,
 415        .ops_size       = ARRAY_SIZE(nf_ct_bridge_hook_ops),
 416        .me             = THIS_MODULE,
 417};
 418
 419static int __init nf_conntrack_l3proto_bridge_init(void)
 420{
 421        nf_ct_bridge_register(&bridge_info);
 422
 423        return 0;
 424}
 425
 426static void __exit nf_conntrack_l3proto_bridge_fini(void)
 427{
 428        nf_ct_bridge_unregister(&bridge_info);
 429}
 430
 431module_init(nf_conntrack_l3proto_bridge_init);
 432module_exit(nf_conntrack_l3proto_bridge_fini);
 433
 434MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
 435MODULE_LICENSE("GPL");
 436