linux/net/netfilter/nf_nat_helper.c
<<
>>
Prefs
   1/* nf_nat_helper.c - generic support functions for NAT helpers
   2 *
   3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
   4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/module.h>
  11#include <linux/gfp.h>
  12#include <linux/types.h>
  13#include <linux/skbuff.h>
  14#include <linux/tcp.h>
  15#include <linux/udp.h>
  16#include <net/tcp.h>
  17
  18#include <net/netfilter/nf_conntrack.h>
  19#include <net/netfilter/nf_conntrack_helper.h>
  20#include <net/netfilter/nf_conntrack_ecache.h>
  21#include <net/netfilter/nf_conntrack_expect.h>
  22#include <net/netfilter/nf_nat.h>
  23#include <net/netfilter/nf_nat_l3proto.h>
  24#include <net/netfilter/nf_nat_l4proto.h>
  25#include <net/netfilter/nf_nat_core.h>
  26#include <net/netfilter/nf_nat_helper.h>
  27
  28#define DUMP_OFFSET(x) \
  29        pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
  30                 x->offset_before, x->offset_after, x->correction_pos);
  31
  32static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
  33
  34/* Setup TCP sequence correction given this change at this sequence */
  35static inline void
  36adjust_tcp_sequence(u32 seq,
  37                    int sizediff,
  38                    struct nf_conn *ct,
  39                    enum ip_conntrack_info ctinfo)
  40{
  41        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
  42        struct nf_conn_nat *nat = nfct_nat(ct);
  43        struct nf_nat_seq *this_way = &nat->seq[dir];
  44
  45        pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
  46                 seq, sizediff);
  47
  48        pr_debug("adjust_tcp_sequence: Seq_offset before: ");
  49        DUMP_OFFSET(this_way);
  50
  51        spin_lock_bh(&nf_nat_seqofs_lock);
  52
  53        /* SYN adjust. If it's uninitialized, or this is after last
  54         * correction, record it: we don't handle more than one
  55         * adjustment in the window, but do deal with common case of a
  56         * retransmit */
  57        if (this_way->offset_before == this_way->offset_after ||
  58            before(this_way->correction_pos, seq)) {
  59                this_way->correction_pos = seq;
  60                this_way->offset_before = this_way->offset_after;
  61                this_way->offset_after += sizediff;
  62        }
  63        spin_unlock_bh(&nf_nat_seqofs_lock);
  64
  65        pr_debug("adjust_tcp_sequence: Seq_offset after: ");
  66        DUMP_OFFSET(this_way);
  67}
  68
  69/* Get the offset value, for conntrack */
  70s16 nf_nat_get_offset(const struct nf_conn *ct,
  71                      enum ip_conntrack_dir dir,
  72                      u32 seq)
  73{
  74        struct nf_conn_nat *nat = nfct_nat(ct);
  75        struct nf_nat_seq *this_way;
  76        s16 offset;
  77
  78        if (!nat)
  79                return 0;
  80
  81        this_way = &nat->seq[dir];
  82        spin_lock_bh(&nf_nat_seqofs_lock);
  83        offset = after(seq, this_way->correction_pos)
  84                 ? this_way->offset_after : this_way->offset_before;
  85        spin_unlock_bh(&nf_nat_seqofs_lock);
  86
  87        return offset;
  88}
  89
  90/* Frobs data inside this packet, which is linear. */
  91static void mangle_contents(struct sk_buff *skb,
  92                            unsigned int dataoff,
  93                            unsigned int match_offset,
  94                            unsigned int match_len,
  95                            const char *rep_buffer,
  96                            unsigned int rep_len)
  97{
  98        unsigned char *data;
  99
 100        BUG_ON(skb_is_nonlinear(skb));
 101        data = skb_network_header(skb) + dataoff;
 102
 103        /* move post-replacement */
 104        memmove(data + match_offset + rep_len,
 105                data + match_offset + match_len,
 106                skb->tail - (skb->network_header + dataoff +
 107                             match_offset + match_len));
 108
 109        /* insert data from buffer */
 110        memcpy(data + match_offset, rep_buffer, rep_len);
 111
 112        /* update skb info */
 113        if (rep_len > match_len) {
 114                pr_debug("nf_nat_mangle_packet: Extending packet by "
 115                         "%u from %u bytes\n", rep_len - match_len, skb->len);
 116                skb_put(skb, rep_len - match_len);
 117        } else {
 118                pr_debug("nf_nat_mangle_packet: Shrinking packet from "
 119                         "%u from %u bytes\n", match_len - rep_len, skb->len);
 120                __skb_trim(skb, skb->len + rep_len - match_len);
 121        }
 122
 123        if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) {
 124                /* fix IP hdr checksum information */
 125                ip_hdr(skb)->tot_len = htons(skb->len);
 126                ip_send_check(ip_hdr(skb));
 127        } else
 128                ipv6_hdr(skb)->payload_len =
 129                        htons(skb->len - sizeof(struct ipv6hdr));
 130}
 131
 132/* Unusual, but possible case. */
 133static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 134{
 135        if (skb->len + extra > 65535)
 136                return 0;
 137
 138        if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
 139                return 0;
 140
 141        return 1;
 142}
 143
 144void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 145                           __be32 seq, s16 off)
 146{
 147        if (!off)
 148                return;
 149        set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 150        adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
 151        nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
 152}
 153EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
 154
 155void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
 156                           u32 ctinfo, int off)
 157{
 158        const struct tcphdr *th;
 159
 160        if (nf_ct_protonum(ct) != IPPROTO_TCP)
 161                return;
 162
 163        th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
 164        nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
 165}
 166EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
 167
 168/* Generic function for mangling variable-length address changes inside
 169 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
 170 * command in FTP).
 171 *
 172 * Takes care about all the nasty sequence number changes, checksumming,
 173 * skb enlargement, ...
 174 *
 175 * */
 176int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 177                               struct nf_conn *ct,
 178                               enum ip_conntrack_info ctinfo,
 179                               unsigned int protoff,
 180                               unsigned int match_offset,
 181                               unsigned int match_len,
 182                               const char *rep_buffer,
 183                               unsigned int rep_len, bool adjust)
 184{
 185        const struct nf_nat_l3proto *l3proto;
 186        struct tcphdr *tcph;
 187        int oldlen, datalen;
 188
 189        if (!skb_make_writable(skb, skb->len))
 190                return 0;
 191
 192        if (rep_len > match_len &&
 193            rep_len - match_len > skb_tailroom(skb) &&
 194            !enlarge_skb(skb, rep_len - match_len))
 195                return 0;
 196
 197        SKB_LINEAR_ASSERT(skb);
 198
 199        tcph = (void *)skb->data + protoff;
 200
 201        oldlen = skb->len - protoff;
 202        mangle_contents(skb, protoff + tcph->doff*4,
 203                        match_offset, match_len, rep_buffer, rep_len);
 204
 205        datalen = skb->len - protoff;
 206
 207        l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
 208        l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
 209                             datalen, oldlen);
 210
 211        if (adjust && rep_len != match_len)
 212                nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
 213                                      (int)rep_len - (int)match_len);
 214
 215        return 1;
 216}
 217EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
 218
 219/* Generic function for mangling variable-length address changes inside
 220 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
 221 * command in the Amanda protocol)
 222 *
 223 * Takes care about all the nasty sequence number changes, checksumming,
 224 * skb enlargement, ...
 225 *
 226 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
 227 *       should be fairly easy to do.
 228 */
 229int
 230nf_nat_mangle_udp_packet(struct sk_buff *skb,
 231                         struct nf_conn *ct,
 232                         enum ip_conntrack_info ctinfo,
 233                         unsigned int protoff,
 234                         unsigned int match_offset,
 235                         unsigned int match_len,
 236                         const char *rep_buffer,
 237                         unsigned int rep_len)
 238{
 239        const struct nf_nat_l3proto *l3proto;
 240        struct udphdr *udph;
 241        int datalen, oldlen;
 242
 243        if (!skb_make_writable(skb, skb->len))
 244                return 0;
 245
 246        if (rep_len > match_len &&
 247            rep_len - match_len > skb_tailroom(skb) &&
 248            !enlarge_skb(skb, rep_len - match_len))
 249                return 0;
 250
 251        udph = (void *)skb->data + protoff;
 252
 253        oldlen = skb->len - protoff;
 254        mangle_contents(skb, protoff + sizeof(*udph),
 255                        match_offset, match_len, rep_buffer, rep_len);
 256
 257        /* update the length of the UDP packet */
 258        datalen = skb->len - protoff;
 259        udph->len = htons(datalen);
 260
 261        /* fix udp checksum if udp checksum was previously calculated */
 262        if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 263                return 1;
 264
 265        l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
 266        l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
 267                             datalen, oldlen);
 268
 269        return 1;
 270}
 271EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 272
 273/* Adjust one found SACK option including checksum correction */
 274static void
 275sack_adjust(struct sk_buff *skb,
 276            struct tcphdr *tcph,
 277            unsigned int sackoff,
 278            unsigned int sackend,
 279            struct nf_nat_seq *natseq)
 280{
 281        while (sackoff < sackend) {
 282                struct tcp_sack_block_wire *sack;
 283                __be32 new_start_seq, new_end_seq;
 284
 285                sack = (void *)skb->data + sackoff;
 286                if (after(ntohl(sack->start_seq) - natseq->offset_before,
 287                          natseq->correction_pos))
 288                        new_start_seq = htonl(ntohl(sack->start_seq)
 289                                        - natseq->offset_after);
 290                else
 291                        new_start_seq = htonl(ntohl(sack->start_seq)
 292                                        - natseq->offset_before);
 293
 294                if (after(ntohl(sack->end_seq) - natseq->offset_before,
 295                          natseq->correction_pos))
 296                        new_end_seq = htonl(ntohl(sack->end_seq)
 297                                      - natseq->offset_after);
 298                else
 299                        new_end_seq = htonl(ntohl(sack->end_seq)
 300                                      - natseq->offset_before);
 301
 302                pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
 303                         ntohl(sack->start_seq), new_start_seq,
 304                         ntohl(sack->end_seq), new_end_seq);
 305
 306                inet_proto_csum_replace4(&tcph->check, skb,
 307                                         sack->start_seq, new_start_seq, 0);
 308                inet_proto_csum_replace4(&tcph->check, skb,
 309                                         sack->end_seq, new_end_seq, 0);
 310                sack->start_seq = new_start_seq;
 311                sack->end_seq = new_end_seq;
 312                sackoff += sizeof(*sack);
 313        }
 314}
 315
 316/* TCP SACK sequence number adjustment */
 317static inline unsigned int
 318nf_nat_sack_adjust(struct sk_buff *skb,
 319                   unsigned int protoff,
 320                   struct tcphdr *tcph,
 321                   struct nf_conn *ct,
 322                   enum ip_conntrack_info ctinfo)
 323{
 324        unsigned int dir, optoff, optend;
 325        struct nf_conn_nat *nat = nfct_nat(ct);
 326
 327        optoff = protoff + sizeof(struct tcphdr);
 328        optend = protoff + tcph->doff * 4;
 329
 330        if (!skb_make_writable(skb, optend))
 331                return 0;
 332
 333        dir = CTINFO2DIR(ctinfo);
 334
 335        while (optoff < optend) {
 336                /* Usually: option, length. */
 337                unsigned char *op = skb->data + optoff;
 338
 339                switch (op[0]) {
 340                case TCPOPT_EOL:
 341                        return 1;
 342                case TCPOPT_NOP:
 343                        optoff++;
 344                        continue;
 345                default:
 346                        /* no partial options */
 347                        if (optoff + 1 == optend ||
 348                            optoff + op[1] > optend ||
 349                            op[1] < 2)
 350                                return 0;
 351                        if (op[0] == TCPOPT_SACK &&
 352                            op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
 353                            ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
 354                                sack_adjust(skb, tcph, optoff+2,
 355                                            optoff+op[1], &nat->seq[!dir]);
 356                        optoff += op[1];
 357                }
 358        }
 359        return 1;
 360}
 361
 362/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
 363int
 364nf_nat_seq_adjust(struct sk_buff *skb,
 365                  struct nf_conn *ct,
 366                  enum ip_conntrack_info ctinfo,
 367                  unsigned int protoff)
 368{
 369        struct tcphdr *tcph;
 370        int dir;
 371        __be32 newseq, newack;
 372        s16 seqoff, ackoff;
 373        struct nf_conn_nat *nat = nfct_nat(ct);
 374        struct nf_nat_seq *this_way, *other_way;
 375
 376        dir = CTINFO2DIR(ctinfo);
 377
 378        this_way = &nat->seq[dir];
 379        other_way = &nat->seq[!dir];
 380
 381        if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
 382                return 0;
 383
 384        tcph = (void *)skb->data + protoff;
 385        if (after(ntohl(tcph->seq), this_way->correction_pos))
 386                seqoff = this_way->offset_after;
 387        else
 388                seqoff = this_way->offset_before;
 389
 390        if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 391                  other_way->correction_pos))
 392                ackoff = other_way->offset_after;
 393        else
 394                ackoff = other_way->offset_before;
 395
 396        newseq = htonl(ntohl(tcph->seq) + seqoff);
 397        newack = htonl(ntohl(tcph->ack_seq) - ackoff);
 398
 399        inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
 400        inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
 401
 402        pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 403                 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
 404                 ntohl(newack));
 405
 406        tcph->seq = newseq;
 407        tcph->ack_seq = newack;
 408
 409        return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
 410}
 411
 412/* Setup NAT on this expected conntrack so it follows master. */
 413/* If we fail to get a free NAT slot, we'll get dropped on confirm */
 414void nf_nat_follow_master(struct nf_conn *ct,
 415                          struct nf_conntrack_expect *exp)
 416{
 417        struct nf_nat_range range;
 418
 419        /* This must be a fresh one. */
 420        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 421
 422        /* Change src to where master sends to */
 423        range.flags = NF_NAT_RANGE_MAP_IPS;
 424        range.min_addr = range.max_addr
 425                = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
 426        nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 427
 428        /* For DST manip, map port here to where it's expected. */
 429        range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 430        range.min_proto = range.max_proto = exp->saved_proto;
 431        range.min_addr = range.max_addr
 432                = ct->master->tuplehash[!exp->dir].tuple.src.u3;
 433        nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 434}
 435EXPORT_SYMBOL(nf_nat_follow_master);
 436