linux/net/ipv4/netfilter/nf_nat_helper.c
<<
>>
Prefs
   1/* ip_nat_helper.c - generic support functions for NAT helpers
   2 *
   3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
   4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/module.h>
  11#include <linux/kmod.h>
  12#include <linux/types.h>
  13#include <linux/timer.h>
  14#include <linux/skbuff.h>
  15#include <linux/tcp.h>
  16#include <linux/udp.h>
  17#include <net/checksum.h>
  18#include <net/tcp.h>
  19
  20#include <linux/netfilter_ipv4.h>
  21#include <net/netfilter/nf_conntrack.h>
  22#include <net/netfilter/nf_conntrack_helper.h>
  23#include <net/netfilter/nf_conntrack_expect.h>
  24#include <net/netfilter/nf_nat.h>
  25#include <net/netfilter/nf_nat_protocol.h>
  26#include <net/netfilter/nf_nat_core.h>
  27#include <net/netfilter/nf_nat_helper.h>
  28
  29#define DUMP_OFFSET(x) \
  30        pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
  31                 x->offset_before, x->offset_after, x->correction_pos);
  32
  33static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
  34
  35/* Setup TCP sequence correction given this change at this sequence */
  36static inline void
  37adjust_tcp_sequence(u32 seq,
  38                    int sizediff,
  39                    struct nf_conn *ct,
  40                    enum ip_conntrack_info ctinfo)
  41{
  42        int dir;
  43        struct nf_nat_seq *this_way, *other_way;
  44        struct nf_conn_nat *nat = nfct_nat(ct);
  45
  46        pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
  47                 ntohl(seq), seq);
  48
  49        dir = CTINFO2DIR(ctinfo);
  50
  51        this_way = &nat->seq[dir];
  52        other_way = &nat->seq[!dir];
  53
  54        pr_debug("nf_nat_resize_packet: Seq_offset before: ");
  55        DUMP_OFFSET(this_way);
  56
  57        spin_lock_bh(&nf_nat_seqofs_lock);
  58
  59        /* SYN adjust. If it's uninitialized, or this is after last
  60         * correction, record it: we don't handle more than one
  61         * adjustment in the window, but do deal with common case of a
  62         * retransmit */
  63        if (this_way->offset_before == this_way->offset_after ||
  64            before(this_way->correction_pos, seq)) {
  65                   this_way->correction_pos = seq;
  66                   this_way->offset_before = this_way->offset_after;
  67                   this_way->offset_after += sizediff;
  68        }
  69        spin_unlock_bh(&nf_nat_seqofs_lock);
  70
  71        pr_debug("nf_nat_resize_packet: Seq_offset after: ");
  72        DUMP_OFFSET(this_way);
  73}
  74
  75/* Frobs data inside this packet, which is linear. */
  76static void mangle_contents(struct sk_buff *skb,
  77                            unsigned int dataoff,
  78                            unsigned int match_offset,
  79                            unsigned int match_len,
  80                            const char *rep_buffer,
  81                            unsigned int rep_len)
  82{
  83        unsigned char *data;
  84
  85        BUG_ON(skb_is_nonlinear(skb));
  86        data = skb_network_header(skb) + dataoff;
  87
  88        /* move post-replacement */
  89        memmove(data + match_offset + rep_len,
  90                data + match_offset + match_len,
  91                skb->tail - (skb->network_header + dataoff +
  92                             match_offset + match_len));
  93
  94        /* insert data from buffer */
  95        memcpy(data + match_offset, rep_buffer, rep_len);
  96
  97        /* update skb info */
  98        if (rep_len > match_len) {
  99                pr_debug("nf_nat_mangle_packet: Extending packet by "
 100                         "%u from %u bytes\n", rep_len - match_len, skb->len);
 101                skb_put(skb, rep_len - match_len);
 102        } else {
 103                pr_debug("nf_nat_mangle_packet: Shrinking packet from "
 104                         "%u from %u bytes\n", match_len - rep_len, skb->len);
 105                __skb_trim(skb, skb->len + rep_len - match_len);
 106        }
 107
 108        /* fix IP hdr checksum information */
 109        ip_hdr(skb)->tot_len = htons(skb->len);
 110        ip_send_check(ip_hdr(skb));
 111}
 112
 113/* Unusual, but possible case. */
 114static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 115{
 116        if (skb->len + extra > 65535)
 117                return 0;
 118
 119        if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
 120                return 0;
 121
 122        return 1;
 123}
 124
 125/* Generic function for mangling variable-length address changes inside
 126 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
 127 * command in FTP).
 128 *
 129 * Takes care about all the nasty sequence number changes, checksumming,
 130 * skb enlargement, ...
 131 *
 132 * */
 133int
 134nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 135                         struct nf_conn *ct,
 136                         enum ip_conntrack_info ctinfo,
 137                         unsigned int match_offset,
 138                         unsigned int match_len,
 139                         const char *rep_buffer,
 140                         unsigned int rep_len)
 141{
 142        struct rtable *rt = (struct rtable *)skb->dst;
 143        struct iphdr *iph;
 144        struct tcphdr *tcph;
 145        int oldlen, datalen;
 146
 147        if (!skb_make_writable(skb, skb->len))
 148                return 0;
 149
 150        if (rep_len > match_len &&
 151            rep_len - match_len > skb_tailroom(skb) &&
 152            !enlarge_skb(skb, rep_len - match_len))
 153                return 0;
 154
 155        SKB_LINEAR_ASSERT(skb);
 156
 157        iph = ip_hdr(skb);
 158        tcph = (void *)iph + iph->ihl*4;
 159
 160        oldlen = skb->len - iph->ihl*4;
 161        mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
 162                        match_offset, match_len, rep_buffer, rep_len);
 163
 164        datalen = skb->len - iph->ihl*4;
 165        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 166                if (!(rt->rt_flags & RTCF_LOCAL) &&
 167                    skb->dev->features & NETIF_F_V4_CSUM) {
 168                        skb->ip_summed = CHECKSUM_PARTIAL;
 169                        skb->csum_start = skb_headroom(skb) +
 170                                          skb_network_offset(skb) +
 171                                          iph->ihl * 4;
 172                        skb->csum_offset = offsetof(struct tcphdr, check);
 173                        tcph->check = ~tcp_v4_check(datalen,
 174                                                    iph->saddr, iph->daddr, 0);
 175                } else {
 176                        tcph->check = 0;
 177                        tcph->check = tcp_v4_check(datalen,
 178                                                   iph->saddr, iph->daddr,
 179                                                   csum_partial(tcph,
 180                                                                datalen, 0));
 181                }
 182        } else
 183                nf_proto_csum_replace2(&tcph->check, skb,
 184                                       htons(oldlen), htons(datalen), 1);
 185
 186        if (rep_len != match_len) {
 187                set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 188                adjust_tcp_sequence(ntohl(tcph->seq),
 189                                    (int)rep_len - (int)match_len,
 190                                    ct, ctinfo);
 191                /* Tell TCP window tracking about seq change */
 192                nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
 193                                        ct, CTINFO2DIR(ctinfo));
 194        }
 195        return 1;
 196}
 197EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
 198
 199/* Generic function for mangling variable-length address changes inside
 200 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
 201 * command in the Amanda protocol)
 202 *
 203 * Takes care about all the nasty sequence number changes, checksumming,
 204 * skb enlargement, ...
 205 *
 206 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
 207 *       should be fairly easy to do.
 208 */
 209int
 210nf_nat_mangle_udp_packet(struct sk_buff *skb,
 211                         struct nf_conn *ct,
 212                         enum ip_conntrack_info ctinfo,
 213                         unsigned int match_offset,
 214                         unsigned int match_len,
 215                         const char *rep_buffer,
 216                         unsigned int rep_len)
 217{
 218        struct rtable *rt = (struct rtable *)skb->dst;
 219        struct iphdr *iph;
 220        struct udphdr *udph;
 221        int datalen, oldlen;
 222
 223        /* UDP helpers might accidentally mangle the wrong packet */
 224        iph = ip_hdr(skb);
 225        if (skb->len < iph->ihl*4 + sizeof(*udph) +
 226                               match_offset + match_len)
 227                return 0;
 228
 229        if (!skb_make_writable(skb, skb->len))
 230                return 0;
 231
 232        if (rep_len > match_len &&
 233            rep_len - match_len > skb_tailroom(skb) &&
 234            !enlarge_skb(skb, rep_len - match_len))
 235                return 0;
 236
 237        iph = ip_hdr(skb);
 238        udph = (void *)iph + iph->ihl*4;
 239
 240        oldlen = skb->len - iph->ihl*4;
 241        mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
 242                        match_offset, match_len, rep_buffer, rep_len);
 243
 244        /* update the length of the UDP packet */
 245        datalen = skb->len - iph->ihl*4;
 246        udph->len = htons(datalen);
 247
 248        /* fix udp checksum if udp checksum was previously calculated */
 249        if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 250                return 1;
 251
 252        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 253                if (!(rt->rt_flags & RTCF_LOCAL) &&
 254                    skb->dev->features & NETIF_F_V4_CSUM) {
 255                        skb->ip_summed = CHECKSUM_PARTIAL;
 256                        skb->csum_start = skb_headroom(skb) +
 257                                          skb_network_offset(skb) +
 258                                          iph->ihl * 4;
 259                        skb->csum_offset = offsetof(struct udphdr, check);
 260                        udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 261                                                         datalen, IPPROTO_UDP,
 262                                                         0);
 263                } else {
 264                        udph->check = 0;
 265                        udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 266                                                        datalen, IPPROTO_UDP,
 267                                                        csum_partial(udph,
 268                                                                     datalen, 0));
 269                        if (!udph->check)
 270                                udph->check = CSUM_MANGLED_0;
 271                }
 272        } else
 273                nf_proto_csum_replace2(&udph->check, skb,
 274                                       htons(oldlen), htons(datalen), 1);
 275
 276        return 1;
 277}
 278EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 279
 280/* Adjust one found SACK option including checksum correction */
 281static void
 282sack_adjust(struct sk_buff *skb,
 283            struct tcphdr *tcph,
 284            unsigned int sackoff,
 285            unsigned int sackend,
 286            struct nf_nat_seq *natseq)
 287{
 288        while (sackoff < sackend) {
 289                struct tcp_sack_block_wire *sack;
 290                __be32 new_start_seq, new_end_seq;
 291
 292                sack = (void *)skb->data + sackoff;
 293                if (after(ntohl(sack->start_seq) - natseq->offset_before,
 294                          natseq->correction_pos))
 295                        new_start_seq = htonl(ntohl(sack->start_seq)
 296                                        - natseq->offset_after);
 297                else
 298                        new_start_seq = htonl(ntohl(sack->start_seq)
 299                                        - natseq->offset_before);
 300
 301                if (after(ntohl(sack->end_seq) - natseq->offset_before,
 302                          natseq->correction_pos))
 303                        new_end_seq = htonl(ntohl(sack->end_seq)
 304                                      - natseq->offset_after);
 305                else
 306                        new_end_seq = htonl(ntohl(sack->end_seq)
 307                                      - natseq->offset_before);
 308
 309                pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
 310                         ntohl(sack->start_seq), new_start_seq,
 311                         ntohl(sack->end_seq), new_end_seq);
 312
 313                nf_proto_csum_replace4(&tcph->check, skb,
 314                                       sack->start_seq, new_start_seq, 0);
 315                nf_proto_csum_replace4(&tcph->check, skb,
 316                                       sack->end_seq, new_end_seq, 0);
 317                sack->start_seq = new_start_seq;
 318                sack->end_seq = new_end_seq;
 319                sackoff += sizeof(*sack);
 320        }
 321}
 322
 323/* TCP SACK sequence number adjustment */
 324static inline unsigned int
 325nf_nat_sack_adjust(struct sk_buff *skb,
 326                   struct tcphdr *tcph,
 327                   struct nf_conn *ct,
 328                   enum ip_conntrack_info ctinfo)
 329{
 330        unsigned int dir, optoff, optend;
 331        struct nf_conn_nat *nat = nfct_nat(ct);
 332
 333        optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
 334        optend = ip_hdrlen(skb) + tcph->doff * 4;
 335
 336        if (!skb_make_writable(skb, optend))
 337                return 0;
 338
 339        dir = CTINFO2DIR(ctinfo);
 340
 341        while (optoff < optend) {
 342                /* Usually: option, length. */
 343                unsigned char *op = skb->data + optoff;
 344
 345                switch (op[0]) {
 346                case TCPOPT_EOL:
 347                        return 1;
 348                case TCPOPT_NOP:
 349                        optoff++;
 350                        continue;
 351                default:
 352                        /* no partial options */
 353                        if (optoff + 1 == optend ||
 354                            optoff + op[1] > optend ||
 355                            op[1] < 2)
 356                                return 0;
 357                        if (op[0] == TCPOPT_SACK &&
 358                            op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
 359                            ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
 360                                sack_adjust(skb, tcph, optoff+2,
 361                                            optoff+op[1], &nat->seq[!dir]);
 362                        optoff += op[1];
 363                }
 364        }
 365        return 1;
 366}
 367
 368/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
 369int
 370nf_nat_seq_adjust(struct sk_buff *skb,
 371                  struct nf_conn *ct,
 372                  enum ip_conntrack_info ctinfo)
 373{
 374        struct tcphdr *tcph;
 375        int dir;
 376        __be32 newseq, newack;
 377        struct nf_conn_nat *nat = nfct_nat(ct);
 378        struct nf_nat_seq *this_way, *other_way;
 379
 380        dir = CTINFO2DIR(ctinfo);
 381
 382        this_way = &nat->seq[dir];
 383        other_way = &nat->seq[!dir];
 384
 385        if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
 386                return 0;
 387
 388        tcph = (void *)skb->data + ip_hdrlen(skb);
 389        if (after(ntohl(tcph->seq), this_way->correction_pos))
 390                newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 391        else
 392                newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
 393
 394        if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 395                  other_way->correction_pos))
 396                newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
 397        else
 398                newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
 399
 400        nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
 401        nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
 402
 403        pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 404                 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
 405                 ntohl(newack));
 406
 407        tcph->seq = newseq;
 408        tcph->ack_seq = newack;
 409
 410        if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
 411                return 0;
 412
 413        nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
 414
 415        return 1;
 416}
 417EXPORT_SYMBOL(nf_nat_seq_adjust);
 418
 419/* Setup NAT on this expected conntrack so it follows master. */
 420/* If we fail to get a free NAT slot, we'll get dropped on confirm */
 421void nf_nat_follow_master(struct nf_conn *ct,
 422                          struct nf_conntrack_expect *exp)
 423{
 424        struct nf_nat_range range;
 425
 426        /* This must be a fresh one. */
 427        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 428
 429        /* Change src to where master sends to */
 430        range.flags = IP_NAT_RANGE_MAP_IPS;
 431        range.min_ip = range.max_ip
 432                = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
 433        /* hook doesn't matter, but it has to do source manip */
 434        nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
 435
 436        /* For DST manip, map port here to where it's expected. */
 437        range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
 438        range.min = range.max = exp->saved_proto;
 439        range.min_ip = range.max_ip
 440                = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
 441        /* hook doesn't matter, but it has to do destination manip */
 442        nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
 443}
 444EXPORT_SYMBOL(nf_nat_follow_master);
 445