linux/net/netfilter/nf_nat_helper.c
<<
>>
Prefs
   1/* nf_nat_helper.c - generic support functions for NAT helpers
   2 *
   3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
   4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
   5 * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/module.h>
  12#include <linux/gfp.h>
  13#include <linux/types.h>
  14#include <linux/skbuff.h>
  15#include <linux/tcp.h>
  16#include <linux/udp.h>
  17#include <net/tcp.h>
  18
  19#include <net/netfilter/nf_conntrack.h>
  20#include <net/netfilter/nf_conntrack_helper.h>
  21#include <net/netfilter/nf_conntrack_ecache.h>
  22#include <net/netfilter/nf_conntrack_expect.h>
  23#include <net/netfilter/nf_nat.h>
  24#include <net/netfilter/nf_nat_l3proto.h>
  25#include <net/netfilter/nf_nat_l4proto.h>
  26#include <net/netfilter/nf_nat_core.h>
  27#include <net/netfilter/nf_nat_helper.h>
  28
  29#define DUMP_OFFSET(x) \
  30        pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
  31                 x->offset_before, x->offset_after, x->correction_pos);
  32
  33static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
  34
  35/* Setup TCP sequence correction given this change at this sequence */
  36static inline void
  37adjust_tcp_sequence(u32 seq,
  38                    int sizediff,
  39                    struct nf_conn *ct,
  40                    enum ip_conntrack_info ctinfo)
  41{
  42        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
  43        struct nf_conn_nat *nat = nfct_nat(ct);
  44        struct nf_nat_seq *this_way = &nat->seq[dir];
  45
  46        pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
  47                 seq, sizediff);
  48
  49        pr_debug("adjust_tcp_sequence: Seq_offset before: ");
  50        DUMP_OFFSET(this_way);
  51
  52        spin_lock_bh(&nf_nat_seqofs_lock);
  53
  54        /* SYN adjust. If it's uninitialized, or this is after last
  55         * correction, record it: we don't handle more than one
  56         * adjustment in the window, but do deal with common case of a
  57         * retransmit */
  58        if (this_way->offset_before == this_way->offset_after ||
  59            before(this_way->correction_pos, seq)) {
  60                this_way->correction_pos = seq;
  61                this_way->offset_before = this_way->offset_after;
  62                this_way->offset_after += sizediff;
  63        }
  64        spin_unlock_bh(&nf_nat_seqofs_lock);
  65
  66        pr_debug("adjust_tcp_sequence: Seq_offset after: ");
  67        DUMP_OFFSET(this_way);
  68}
  69
  70/* Get the offset value, for conntrack */
  71s16 nf_nat_get_offset(const struct nf_conn *ct,
  72                      enum ip_conntrack_dir dir,
  73                      u32 seq)
  74{
  75        struct nf_conn_nat *nat = nfct_nat(ct);
  76        struct nf_nat_seq *this_way;
  77        s16 offset;
  78
  79        if (!nat)
  80                return 0;
  81
  82        this_way = &nat->seq[dir];
  83        spin_lock_bh(&nf_nat_seqofs_lock);
  84        offset = after(seq, this_way->correction_pos)
  85                 ? this_way->offset_after : this_way->offset_before;
  86        spin_unlock_bh(&nf_nat_seqofs_lock);
  87
  88        return offset;
  89}
  90
  91/* Frobs data inside this packet, which is linear. */
  92static void mangle_contents(struct sk_buff *skb,
  93                            unsigned int dataoff,
  94                            unsigned int match_offset,
  95                            unsigned int match_len,
  96                            const char *rep_buffer,
  97                            unsigned int rep_len)
  98{
  99        unsigned char *data;
 100
 101        BUG_ON(skb_is_nonlinear(skb));
 102        data = skb_network_header(skb) + dataoff;
 103
 104        /* move post-replacement */
 105        memmove(data + match_offset + rep_len,
 106                data + match_offset + match_len,
 107                skb->tail - (skb->network_header + dataoff +
 108                             match_offset + match_len));
 109
 110        /* insert data from buffer */
 111        memcpy(data + match_offset, rep_buffer, rep_len);
 112
 113        /* update skb info */
 114        if (rep_len > match_len) {
 115                pr_debug("nf_nat_mangle_packet: Extending packet by "
 116                         "%u from %u bytes\n", rep_len - match_len, skb->len);
 117                skb_put(skb, rep_len - match_len);
 118        } else {
 119                pr_debug("nf_nat_mangle_packet: Shrinking packet from "
 120                         "%u from %u bytes\n", match_len - rep_len, skb->len);
 121                __skb_trim(skb, skb->len + rep_len - match_len);
 122        }
 123
 124        if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) {
 125                /* fix IP hdr checksum information */
 126                ip_hdr(skb)->tot_len = htons(skb->len);
 127                ip_send_check(ip_hdr(skb));
 128        } else
 129                ipv6_hdr(skb)->payload_len =
 130                        htons(skb->len - sizeof(struct ipv6hdr));
 131}
 132
 133/* Unusual, but possible case. */
 134static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 135{
 136        if (skb->len + extra > 65535)
 137                return 0;
 138
 139        if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
 140                return 0;
 141
 142        return 1;
 143}
 144
 145void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 146                           __be32 seq, s16 off)
 147{
 148        if (!off)
 149                return;
 150        set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 151        adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
 152        nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
 153}
 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
 155
 156void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
 157                           u32 ctinfo, int off)
 158{
 159        const struct tcphdr *th;
 160
 161        if (nf_ct_protonum(ct) != IPPROTO_TCP)
 162                return;
 163
 164        th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
 165        nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
 166}
 167EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
 168
 169/* Generic function for mangling variable-length address changes inside
 170 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
 171 * command in FTP).
 172 *
 173 * Takes care about all the nasty sequence number changes, checksumming,
 174 * skb enlargement, ...
 175 *
 176 * */
 177int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 178                               struct nf_conn *ct,
 179                               enum ip_conntrack_info ctinfo,
 180                               unsigned int protoff,
 181                               unsigned int match_offset,
 182                               unsigned int match_len,
 183                               const char *rep_buffer,
 184                               unsigned int rep_len, bool adjust)
 185{
 186        const struct nf_nat_l3proto *l3proto;
 187        struct tcphdr *tcph;
 188        int oldlen, datalen;
 189
 190        if (!skb_make_writable(skb, skb->len))
 191                return 0;
 192
 193        if (rep_len > match_len &&
 194            rep_len - match_len > skb_tailroom(skb) &&
 195            !enlarge_skb(skb, rep_len - match_len))
 196                return 0;
 197
 198        SKB_LINEAR_ASSERT(skb);
 199
 200        tcph = (void *)skb->data + protoff;
 201
 202        oldlen = skb->len - protoff;
 203        mangle_contents(skb, protoff + tcph->doff*4,
 204                        match_offset, match_len, rep_buffer, rep_len);
 205
 206        datalen = skb->len - protoff;
 207
 208        l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
 209        l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
 210                             datalen, oldlen);
 211
 212        if (adjust && rep_len != match_len)
 213                nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
 214                                      (int)rep_len - (int)match_len);
 215
 216        return 1;
 217}
 218EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
 219
 220/* Generic function for mangling variable-length address changes inside
 221 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
 222 * command in the Amanda protocol)
 223 *
 224 * Takes care about all the nasty sequence number changes, checksumming,
 225 * skb enlargement, ...
 226 *
 227 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
 228 *       should be fairly easy to do.
 229 */
 230int
 231nf_nat_mangle_udp_packet(struct sk_buff *skb,
 232                         struct nf_conn *ct,
 233                         enum ip_conntrack_info ctinfo,
 234                         unsigned int protoff,
 235                         unsigned int match_offset,
 236                         unsigned int match_len,
 237                         const char *rep_buffer,
 238                         unsigned int rep_len)
 239{
 240        const struct nf_nat_l3proto *l3proto;
 241        struct udphdr *udph;
 242        int datalen, oldlen;
 243
 244        if (!skb_make_writable(skb, skb->len))
 245                return 0;
 246
 247        if (rep_len > match_len &&
 248            rep_len - match_len > skb_tailroom(skb) &&
 249            !enlarge_skb(skb, rep_len - match_len))
 250                return 0;
 251
 252        udph = (void *)skb->data + protoff;
 253
 254        oldlen = skb->len - protoff;
 255        mangle_contents(skb, protoff + sizeof(*udph),
 256                        match_offset, match_len, rep_buffer, rep_len);
 257
 258        /* update the length of the UDP packet */
 259        datalen = skb->len - protoff;
 260        udph->len = htons(datalen);
 261
 262        /* fix udp checksum if udp checksum was previously calculated */
 263        if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 264                return 1;
 265
 266        l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
 267        l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
 268                             datalen, oldlen);
 269
 270        return 1;
 271}
 272EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 273
 274/* Adjust one found SACK option including checksum correction */
 275static void
 276sack_adjust(struct sk_buff *skb,
 277            struct tcphdr *tcph,
 278            unsigned int sackoff,
 279            unsigned int sackend,
 280            struct nf_nat_seq *natseq)
 281{
 282        while (sackoff < sackend) {
 283                struct tcp_sack_block_wire *sack;
 284                __be32 new_start_seq, new_end_seq;
 285
 286                sack = (void *)skb->data + sackoff;
 287                if (after(ntohl(sack->start_seq) - natseq->offset_before,
 288                          natseq->correction_pos))
 289                        new_start_seq = htonl(ntohl(sack->start_seq)
 290                                        - natseq->offset_after);
 291                else
 292                        new_start_seq = htonl(ntohl(sack->start_seq)
 293                                        - natseq->offset_before);
 294
 295                if (after(ntohl(sack->end_seq) - natseq->offset_before,
 296                          natseq->correction_pos))
 297                        new_end_seq = htonl(ntohl(sack->end_seq)
 298                                      - natseq->offset_after);
 299                else
 300                        new_end_seq = htonl(ntohl(sack->end_seq)
 301                                      - natseq->offset_before);
 302
 303                pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
 304                         ntohl(sack->start_seq), new_start_seq,
 305                         ntohl(sack->end_seq), new_end_seq);
 306
 307                inet_proto_csum_replace4(&tcph->check, skb,
 308                                         sack->start_seq, new_start_seq, 0);
 309                inet_proto_csum_replace4(&tcph->check, skb,
 310                                         sack->end_seq, new_end_seq, 0);
 311                sack->start_seq = new_start_seq;
 312                sack->end_seq = new_end_seq;
 313                sackoff += sizeof(*sack);
 314        }
 315}
 316
 317/* TCP SACK sequence number adjustment */
 318static inline unsigned int
 319nf_nat_sack_adjust(struct sk_buff *skb,
 320                   unsigned int protoff,
 321                   struct tcphdr *tcph,
 322                   struct nf_conn *ct,
 323                   enum ip_conntrack_info ctinfo)
 324{
 325        unsigned int dir, optoff, optend;
 326        struct nf_conn_nat *nat = nfct_nat(ct);
 327
 328        optoff = protoff + sizeof(struct tcphdr);
 329        optend = protoff + tcph->doff * 4;
 330
 331        if (!skb_make_writable(skb, optend))
 332                return 0;
 333
 334        dir = CTINFO2DIR(ctinfo);
 335
 336        while (optoff < optend) {
 337                /* Usually: option, length. */
 338                unsigned char *op = skb->data + optoff;
 339
 340                switch (op[0]) {
 341                case TCPOPT_EOL:
 342                        return 1;
 343                case TCPOPT_NOP:
 344                        optoff++;
 345                        continue;
 346                default:
 347                        /* no partial options */
 348                        if (optoff + 1 == optend ||
 349                            optoff + op[1] > optend ||
 350                            op[1] < 2)
 351                                return 0;
 352                        if (op[0] == TCPOPT_SACK &&
 353                            op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
 354                            ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
 355                                sack_adjust(skb, tcph, optoff+2,
 356                                            optoff+op[1], &nat->seq[!dir]);
 357                        optoff += op[1];
 358                }
 359        }
 360        return 1;
 361}
 362
 363/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
 364int
 365nf_nat_seq_adjust(struct sk_buff *skb,
 366                  struct nf_conn *ct,
 367                  enum ip_conntrack_info ctinfo,
 368                  unsigned int protoff)
 369{
 370        struct tcphdr *tcph;
 371        int dir;
 372        __be32 newseq, newack;
 373        s16 seqoff, ackoff;
 374        struct nf_conn_nat *nat = nfct_nat(ct);
 375        struct nf_nat_seq *this_way, *other_way;
 376
 377        dir = CTINFO2DIR(ctinfo);
 378
 379        this_way = &nat->seq[dir];
 380        other_way = &nat->seq[!dir];
 381
 382        if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
 383                return 0;
 384
 385        tcph = (void *)skb->data + protoff;
 386        if (after(ntohl(tcph->seq), this_way->correction_pos))
 387                seqoff = this_way->offset_after;
 388        else
 389                seqoff = this_way->offset_before;
 390
 391        if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 392                  other_way->correction_pos))
 393                ackoff = other_way->offset_after;
 394        else
 395                ackoff = other_way->offset_before;
 396
 397        newseq = htonl(ntohl(tcph->seq) + seqoff);
 398        newack = htonl(ntohl(tcph->ack_seq) - ackoff);
 399
 400        inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
 401        inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
 402
 403        pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 404                 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
 405                 ntohl(newack));
 406
 407        tcph->seq = newseq;
 408        tcph->ack_seq = newack;
 409
 410        return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
 411}
 412
 413/* Setup NAT on this expected conntrack so it follows master. */
 414/* If we fail to get a free NAT slot, we'll get dropped on confirm */
 415void nf_nat_follow_master(struct nf_conn *ct,
 416                          struct nf_conntrack_expect *exp)
 417{
 418        struct nf_nat_range range;
 419
 420        /* This must be a fresh one. */
 421        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 422
 423        /* Change src to where master sends to */
 424        range.flags = NF_NAT_RANGE_MAP_IPS;
 425        range.min_addr = range.max_addr
 426                = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
 427        nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 428
 429        /* For DST manip, map port here to where it's expected. */
 430        range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 431        range.min_proto = range.max_proto = exp->saved_proto;
 432        range.min_addr = range.max_addr
 433                = ct->master->tuplehash[!exp->dir].tuple.src.u3;
 434        nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 435}
 436EXPORT_SYMBOL(nf_nat_follow_master);
 437