linux/net/netfilter/xt_TCPMSS.c
<<
>>
Prefs
   1/*
   2 * This is a module which is used for setting the MSS option in TCP packets.
   3 *
   4 * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
   5 * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  12#include <linux/module.h>
  13#include <linux/skbuff.h>
  14#include <linux/ip.h>
  15#include <linux/gfp.h>
  16#include <linux/ipv6.h>
  17#include <linux/tcp.h>
  18#include <net/dst.h>
  19#include <net/flow.h>
  20#include <net/ipv6.h>
  21#include <net/route.h>
  22#include <net/tcp.h>
  23
  24#include <linux/netfilter_ipv4/ip_tables.h>
  25#include <linux/netfilter_ipv6/ip6_tables.h>
  26#include <linux/netfilter/x_tables.h>
  27#include <linux/netfilter/xt_tcpudp.h>
  28#include <linux/netfilter/xt_TCPMSS.h>
  29
  30MODULE_LICENSE("GPL");
  31MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
  32MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
  33MODULE_ALIAS("ipt_TCPMSS");
  34MODULE_ALIAS("ip6t_TCPMSS");
  35
  36static inline unsigned int
  37optlen(const u_int8_t *opt, unsigned int offset)
  38{
  39        /* Beware zero-length options: make finite progress */
  40        if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
  41                return 1;
  42        else
  43                return opt[offset+1];
  44}
  45
  46static u_int32_t tcpmss_reverse_mtu(struct net *net,
  47                                    const struct sk_buff *skb,
  48                                    unsigned int family)
  49{
  50        struct flowi fl;
  51        const struct nf_afinfo *ai;
  52        struct rtable *rt = NULL;
  53        u_int32_t mtu     = ~0U;
  54
  55        if (family == PF_INET) {
  56                struct flowi4 *fl4 = &fl.u.ip4;
  57                memset(fl4, 0, sizeof(*fl4));
  58                fl4->daddr = ip_hdr(skb)->saddr;
  59        } else {
  60                struct flowi6 *fl6 = &fl.u.ip6;
  61
  62                memset(fl6, 0, sizeof(*fl6));
  63                fl6->daddr = ipv6_hdr(skb)->saddr;
  64        }
  65        rcu_read_lock();
  66        ai = nf_get_afinfo(family);
  67        if (ai != NULL)
  68                ai->route(net, (struct dst_entry **)&rt, &fl, false);
  69        rcu_read_unlock();
  70
  71        if (rt != NULL) {
  72                mtu = dst_mtu(&rt->dst);
  73                dst_release(&rt->dst);
  74        }
  75        return mtu;
  76}
  77
  78static int
  79tcpmss_mangle_packet(struct sk_buff *skb,
  80                     const struct xt_action_param *par,
  81                     unsigned int family,
  82                     unsigned int tcphoff,
  83                     unsigned int minlen)
  84{
  85        const struct xt_tcpmss_info *info = par->targinfo;
  86        struct tcphdr *tcph;
  87        int len, tcp_hdrlen;
  88        unsigned int i;
  89        __be16 oldval;
  90        u16 newmss;
  91        u8 *opt;
  92
  93        /* This is a fragment, no TCP header is available */
  94        if (par->fragoff != 0)
  95                return 0;
  96
  97        if (!skb_make_writable(skb, skb->len))
  98                return -1;
  99
 100        len = skb->len - tcphoff;
 101        if (len < (int)sizeof(struct tcphdr))
 102                return -1;
 103
 104        tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 105        tcp_hdrlen = tcph->doff * 4;
 106
 107        if (len < tcp_hdrlen)
 108                return -1;
 109
 110        if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 111                struct net *net = dev_net(par->in ? par->in : par->out);
 112                unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
 113
 114                if (dst_mtu(skb_dst(skb)) <= minlen) {
 115                        net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
 116                                            dst_mtu(skb_dst(skb)));
 117                        return -1;
 118                }
 119                if (in_mtu <= minlen) {
 120                        net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
 121                                            in_mtu);
 122                        return -1;
 123                }
 124                newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
 125        } else
 126                newmss = info->mss;
 127
 128        opt = (u_int8_t *)tcph;
 129        for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
 130                if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
 131                        u_int16_t oldmss;
 132
 133                        oldmss = (opt[i+2] << 8) | opt[i+3];
 134
 135                        /* Never increase MSS, even when setting it, as
 136                         * doing so results in problems for hosts that rely
 137                         * on MSS being set correctly.
 138                         */
 139                        if (oldmss <= newmss)
 140                                return 0;
 141
 142                        opt[i+2] = (newmss & 0xff00) >> 8;
 143                        opt[i+3] = newmss & 0x00ff;
 144
 145                        inet_proto_csum_replace2(&tcph->check, skb,
 146                                                 htons(oldmss), htons(newmss),
 147                                                 0);
 148                        return 0;
 149                }
 150        }
 151
 152        /* There is data after the header so the option can't be added
 153         * without moving it, and doing so may make the SYN packet
 154         * itself too large. Accept the packet unmodified instead.
 155         */
 156        if (len > tcp_hdrlen)
 157                return 0;
 158
 159        /*
 160         * MSS Option not found ?! add it..
 161         */
 162        if (skb_tailroom(skb) < TCPOLEN_MSS) {
 163                if (pskb_expand_head(skb, 0,
 164                                     TCPOLEN_MSS - skb_tailroom(skb),
 165                                     GFP_ATOMIC))
 166                        return -1;
 167                tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 168        }
 169
 170        skb_put(skb, TCPOLEN_MSS);
 171
 172        /*
 173         * IPv4: RFC 1122 states "If an MSS option is not received at
 174         * connection setup, TCP MUST assume a default send MSS of 536".
 175         * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum
 176         * length IPv6 header of 60, ergo the default MSS value is 1220
 177         * Since no MSS was provided, we must use the default values
 178         */
 179        if (par->family == NFPROTO_IPV4)
 180                newmss = min(newmss, (u16)536);
 181        else
 182                newmss = min(newmss, (u16)1220);
 183
 184        opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 185        memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
 186
 187        inet_proto_csum_replace2(&tcph->check, skb,
 188                                 htons(len), htons(len + TCPOLEN_MSS), 1);
 189        opt[0] = TCPOPT_MSS;
 190        opt[1] = TCPOLEN_MSS;
 191        opt[2] = (newmss & 0xff00) >> 8;
 192        opt[3] = newmss & 0x00ff;
 193
 194        inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
 195
 196        oldval = ((__be16 *)tcph)[6];
 197        tcph->doff += TCPOLEN_MSS/4;
 198        inet_proto_csum_replace2(&tcph->check, skb,
 199                                 oldval, ((__be16 *)tcph)[6], 0);
 200        return TCPOLEN_MSS;
 201}
 202
 203static unsigned int
 204tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 205{
 206        struct iphdr *iph = ip_hdr(skb);
 207        __be16 newlen;
 208        int ret;
 209
 210        ret = tcpmss_mangle_packet(skb, par,
 211                                   PF_INET,
 212                                   iph->ihl * 4,
 213                                   sizeof(*iph) + sizeof(struct tcphdr));
 214        if (ret < 0)
 215                return NF_DROP;
 216        if (ret > 0) {
 217                iph = ip_hdr(skb);
 218                newlen = htons(ntohs(iph->tot_len) + ret);
 219                csum_replace2(&iph->check, iph->tot_len, newlen);
 220                iph->tot_len = newlen;
 221        }
 222        return XT_CONTINUE;
 223}
 224
 225#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 226static unsigned int
 227tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 228{
 229        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 230        u8 nexthdr;
 231        __be16 frag_off;
 232        int tcphoff;
 233        int ret;
 234
 235        nexthdr = ipv6h->nexthdr;
 236        tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
 237        if (tcphoff < 0)
 238                return NF_DROP;
 239        ret = tcpmss_mangle_packet(skb, par,
 240                                   PF_INET6,
 241                                   tcphoff,
 242                                   sizeof(*ipv6h) + sizeof(struct tcphdr));
 243        if (ret < 0)
 244                return NF_DROP;
 245        if (ret > 0) {
 246                ipv6h = ipv6_hdr(skb);
 247                ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
 248        }
 249        return XT_CONTINUE;
 250}
 251#endif
 252
 253/* Must specify -p tcp --syn */
 254static inline bool find_syn_match(const struct xt_entry_match *m)
 255{
 256        const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
 257
 258        if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
 259            tcpinfo->flg_cmp & TCPHDR_SYN &&
 260            !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
 261                return true;
 262
 263        return false;
 264}
 265
 266static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 267{
 268        const struct xt_tcpmss_info *info = par->targinfo;
 269        const struct ipt_entry *e = par->entryinfo;
 270        const struct xt_entry_match *ematch;
 271
 272        if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 273            (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 274                           (1 << NF_INET_LOCAL_OUT) |
 275                           (1 << NF_INET_POST_ROUTING))) != 0) {
 276                pr_info("path-MTU clamping only supported in "
 277                        "FORWARD, OUTPUT and POSTROUTING hooks\n");
 278                return -EINVAL;
 279        }
 280        xt_ematch_foreach(ematch, e)
 281                if (find_syn_match(ematch))
 282                        return 0;
 283        pr_info("Only works on TCP SYN packets\n");
 284        return -EINVAL;
 285}
 286
 287#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 288static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 289{
 290        const struct xt_tcpmss_info *info = par->targinfo;
 291        const struct ip6t_entry *e = par->entryinfo;
 292        const struct xt_entry_match *ematch;
 293
 294        if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 295            (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 296                           (1 << NF_INET_LOCAL_OUT) |
 297                           (1 << NF_INET_POST_ROUTING))) != 0) {
 298                pr_info("path-MTU clamping only supported in "
 299                        "FORWARD, OUTPUT and POSTROUTING hooks\n");
 300                return -EINVAL;
 301        }
 302        xt_ematch_foreach(ematch, e)
 303                if (find_syn_match(ematch))
 304                        return 0;
 305        pr_info("Only works on TCP SYN packets\n");
 306        return -EINVAL;
 307}
 308#endif
 309
 310static struct xt_target tcpmss_tg_reg[] __read_mostly = {
 311        {
 312                .family         = NFPROTO_IPV4,
 313                .name           = "TCPMSS",
 314                .checkentry     = tcpmss_tg4_check,
 315                .target         = tcpmss_tg4,
 316                .targetsize     = sizeof(struct xt_tcpmss_info),
 317                .proto          = IPPROTO_TCP,
 318                .me             = THIS_MODULE,
 319        },
 320#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 321        {
 322                .family         = NFPROTO_IPV6,
 323                .name           = "TCPMSS",
 324                .checkentry     = tcpmss_tg6_check,
 325                .target         = tcpmss_tg6,
 326                .targetsize     = sizeof(struct xt_tcpmss_info),
 327                .proto          = IPPROTO_TCP,
 328                .me             = THIS_MODULE,
 329        },
 330#endif
 331};
 332
 333static int __init tcpmss_tg_init(void)
 334{
 335        return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
 336}
 337
 338static void __exit tcpmss_tg_exit(void)
 339{
 340        xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
 341}
 342
 343module_init(tcpmss_tg_init);
 344module_exit(tcpmss_tg_exit);
 345