linux/net/netfilter/nf_conntrack_proto_tcp.c
<<
>>
Prefs
   1/* (C) 1999-2001 Paul `Rusty' Russell
   2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/timer.h>
  11#include <linux/module.h>
  12#include <linux/in.h>
  13#include <linux/tcp.h>
  14#include <linux/spinlock.h>
  15#include <linux/skbuff.h>
  16#include <linux/ipv6.h>
  17#include <net/ip6_checksum.h>
  18#include <asm/unaligned.h>
  19
  20#include <net/tcp.h>
  21
  22#include <linux/netfilter.h>
  23#include <linux/netfilter_ipv4.h>
  24#include <linux/netfilter_ipv6.h>
  25#include <net/netfilter/nf_conntrack.h>
  26#include <net/netfilter/nf_conntrack_l4proto.h>
  27#include <net/netfilter/nf_conntrack_ecache.h>
  28#include <net/netfilter/nf_log.h>
  29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  31
  32/* "Be conservative in what you do,
  33    be liberal in what you accept from others."
  34    If it's non-zero, we mark only out of window RST segments as INVALID. */
  35static int nf_ct_tcp_be_liberal __read_mostly = 0;
  36
  37/* If it is set to zero, we disable picking up already established
  38   connections. */
  39static int nf_ct_tcp_loose __read_mostly = 1;
  40
  41/* Max number of the retransmitted packets without receiving an (acceptable)
  42   ACK from the destination. If this number is reached, a shorter timer
  43   will be started. */
  44static int nf_ct_tcp_max_retrans __read_mostly = 3;
  45
  46  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
  47     closely.  They're more complex. --RR */
  48
  49static const char *const tcp_conntrack_names[] = {
  50        "NONE",
  51        "SYN_SENT",
  52        "SYN_RECV",
  53        "ESTABLISHED",
  54        "FIN_WAIT",
  55        "CLOSE_WAIT",
  56        "LAST_ACK",
  57        "TIME_WAIT",
  58        "CLOSE",
  59        "SYN_SENT2",
  60};
  61
  62#define SECS * HZ
  63#define MINS * 60 SECS
  64#define HOURS * 60 MINS
  65#define DAYS * 24 HOURS
  66
  67static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
  68        [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
  69        [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
  70        [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
  71        [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
  72        [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
  73        [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
  74        [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
  75        [TCP_CONNTRACK_CLOSE]           = 10 SECS,
  76        [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
  77/* RFC1122 says the R2 limit should be at least 100 seconds.
  78   Linux uses 15 packets as limit, which corresponds
  79   to ~13-30min depending on RTO. */
  80        [TCP_CONNTRACK_RETRANS]         = 5 MINS,
  81        [TCP_CONNTRACK_UNACK]           = 5 MINS,
  82};
  83
  84#define sNO TCP_CONNTRACK_NONE
  85#define sSS TCP_CONNTRACK_SYN_SENT
  86#define sSR TCP_CONNTRACK_SYN_RECV
  87#define sES TCP_CONNTRACK_ESTABLISHED
  88#define sFW TCP_CONNTRACK_FIN_WAIT
  89#define sCW TCP_CONNTRACK_CLOSE_WAIT
  90#define sLA TCP_CONNTRACK_LAST_ACK
  91#define sTW TCP_CONNTRACK_TIME_WAIT
  92#define sCL TCP_CONNTRACK_CLOSE
  93#define sS2 TCP_CONNTRACK_SYN_SENT2
  94#define sIV TCP_CONNTRACK_MAX
  95#define sIG TCP_CONNTRACK_IGNORE
  96
  97/* What TCP flags are set from RST/SYN/FIN/ACK. */
  98enum tcp_bit_set {
  99        TCP_SYN_SET,
 100        TCP_SYNACK_SET,
 101        TCP_FIN_SET,
 102        TCP_ACK_SET,
 103        TCP_RST_SET,
 104        TCP_NONE_SET,
 105};
 106
 107/*
 108 * The TCP state transition table needs a few words...
 109 *
 110 * We are the man in the middle. All the packets go through us
 111 * but might get lost in transit to the destination.
 112 * It is assumed that the destinations can't receive segments
 113 * we haven't seen.
 114 *
 115 * The checked segment is in window, but our windows are *not*
 116 * equivalent with the ones of the sender/receiver. We always
 117 * try to guess the state of the current sender.
 118 *
 119 * The meaning of the states are:
 120 *
 121 * NONE:        initial state
 122 * SYN_SENT:    SYN-only packet seen
 123 * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
 124 * SYN_RECV:    SYN-ACK packet seen
 125 * ESTABLISHED: ACK packet seen
 126 * FIN_WAIT:    FIN packet seen
 127 * CLOSE_WAIT:  ACK seen (after FIN)
 128 * LAST_ACK:    FIN seen (after FIN)
 129 * TIME_WAIT:   last ACK seen
 130 * CLOSE:       closed connection (RST)
 131 *
 132 * Packets marked as IGNORED (sIG):
 133 *      if they may be either invalid or valid
 134 *      and the receiver may send back a connection
 135 *      closing RST or a SYN/ACK.
 136 *
 137 * Packets marked as INVALID (sIV):
 138 *      if we regard them as truly invalid packets
 139 */
 140static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 141        {
 142/* ORIGINAL */
 143/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 144/*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
 145/*
 146 *      sNO -> sSS      Initialize a new connection
 147 *      sSS -> sSS      Retransmitted SYN
 148 *      sS2 -> sS2      Late retransmitted SYN
 149 *      sSR -> sIG
 150 *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
 151 *                      are errors. Receiver will reply with RST
 152 *                      and close the connection.
 153 *                      Or we are not in sync and hold a dead connection.
 154 *      sFW -> sIG
 155 *      sCW -> sIG
 156 *      sLA -> sIG
 157 *      sTW -> sSS      Reopened connection (RFC 1122).
 158 *      sCL -> sSS
 159 */
 160/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 161/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
 162/*
 163 *      sNO -> sIV      Too late and no reason to do anything
 164 *      sSS -> sIV      Client can't send SYN and then SYN/ACK
 165 *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
 166 *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
 167 *      sES -> sIV      Invalid SYN/ACK packets sent by the client
 168 *      sFW -> sIV
 169 *      sCW -> sIV
 170 *      sLA -> sIV
 171 *      sTW -> sIV
 172 *      sCL -> sIV
 173 */
 174/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 175/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 176/*
 177 *      sNO -> sIV      Too late and no reason to do anything...
 178 *      sSS -> sIV      Client migth not send FIN in this state:
 179 *                      we enforce waiting for a SYN/ACK reply first.
 180 *      sS2 -> sIV
 181 *      sSR -> sFW      Close started.
 182 *      sES -> sFW
 183 *      sFW -> sLA      FIN seen in both directions, waiting for
 184 *                      the last ACK.
 185 *                      Migth be a retransmitted FIN as well...
 186 *      sCW -> sLA
 187 *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
 188 *      sTW -> sTW
 189 *      sCL -> sCL
 190 */
 191/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 192/*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 193/*
 194 *      sNO -> sES      Assumed.
 195 *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
 196 *      sS2 -> sIV
 197 *      sSR -> sES      Established state is reached.
 198 *      sES -> sES      :-)
 199 *      sFW -> sCW      Normal close request answered by ACK.
 200 *      sCW -> sCW
 201 *      sLA -> sTW      Last ACK detected.
 202 *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
 203 *      sCL -> sCL
 204 */
 205/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 206/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 207/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 208        },
 209        {
 210/* REPLY */
 211/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 212/*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
 213/*
 214 *      sNO -> sIV      Never reached.
 215 *      sSS -> sS2      Simultaneous open
 216 *      sS2 -> sS2      Retransmitted simultaneous SYN
 217 *      sSR -> sIV      Invalid SYN packets sent by the server
 218 *      sES -> sIV
 219 *      sFW -> sIV
 220 *      sCW -> sIV
 221 *      sLA -> sIV
 222 *      sTW -> sIV      Reopened connection, but server may not do it.
 223 *      sCL -> sIV
 224 */
 225/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 226/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 227/*
 228 *      sSS -> sSR      Standard open.
 229 *      sS2 -> sSR      Simultaneous open
 230 *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
 231 *      sES -> sIG      Late retransmitted SYN/ACK?
 232 *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
 233 *      sCW -> sIG
 234 *      sLA -> sIG
 235 *      sTW -> sIG
 236 *      sCL -> sIG
 237 */
 238/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 239/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 240/*
 241 *      sSS -> sIV      Server might not send FIN in this state.
 242 *      sS2 -> sIV
 243 *      sSR -> sFW      Close started.
 244 *      sES -> sFW
 245 *      sFW -> sLA      FIN seen in both directions.
 246 *      sCW -> sLA
 247 *      sLA -> sLA      Retransmitted FIN.
 248 *      sTW -> sTW
 249 *      sCL -> sCL
 250 */
 251/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 252/*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
 253/*
 254 *      sSS -> sIG      Might be a half-open connection.
 255 *      sS2 -> sIG
 256 *      sSR -> sSR      Might answer late resent SYN.
 257 *      sES -> sES      :-)
 258 *      sFW -> sCW      Normal close request answered by ACK.
 259 *      sCW -> sCW
 260 *      sLA -> sTW      Last ACK detected.
 261 *      sTW -> sTW      Retransmitted last ACK.
 262 *      sCL -> sCL
 263 */
 264/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 265/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 266/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 267        }
 268};
 269
 270static inline struct nf_tcp_net *tcp_pernet(struct net *net)
 271{
 272        return &net->ct.nf_ct_proto.tcp;
 273}
 274
 275static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 276                             struct nf_conntrack_tuple *tuple)
 277{
 278        const struct tcphdr *hp;
 279        struct tcphdr _hdr;
 280
 281        /* Actually only need first 8 bytes. */
 282        hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
 283        if (hp == NULL)
 284                return false;
 285
 286        tuple->src.u.tcp.port = hp->source;
 287        tuple->dst.u.tcp.port = hp->dest;
 288
 289        return true;
 290}
 291
 292static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 293                             const struct nf_conntrack_tuple *orig)
 294{
 295        tuple->src.u.tcp.port = orig->dst.u.tcp.port;
 296        tuple->dst.u.tcp.port = orig->src.u.tcp.port;
 297        return true;
 298}
 299
 300/* Print out the per-protocol part of the tuple. */
 301static int tcp_print_tuple(struct seq_file *s,
 302                           const struct nf_conntrack_tuple *tuple)
 303{
 304        return seq_printf(s, "sport=%hu dport=%hu ",
 305                          ntohs(tuple->src.u.tcp.port),
 306                          ntohs(tuple->dst.u.tcp.port));
 307}
 308
 309/* Print out the private part of the conntrack. */
 310static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 311{
 312        enum tcp_conntrack state;
 313
 314        spin_lock_bh(&ct->lock);
 315        state = ct->proto.tcp.state;
 316        spin_unlock_bh(&ct->lock);
 317
 318        return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 319}
 320
 321static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 322{
 323        if (tcph->rst) return TCP_RST_SET;
 324        else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
 325        else if (tcph->fin) return TCP_FIN_SET;
 326        else if (tcph->ack) return TCP_ACK_SET;
 327        else return TCP_NONE_SET;
 328}
 329
 330/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
 331   in IP Filter' by Guido van Rooij.
 332
 333   http://www.sane.nl/events/sane2000/papers.html
 334   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
 335
 336   The boundaries and the conditions are changed according to RFC793:
 337   the packet must intersect the window (i.e. segments may be
 338   after the right or before the left edge) and thus receivers may ACK
 339   segments after the right edge of the window.
 340
 341        td_maxend = max(sack + max(win,1)) seen in reply packets
 342        td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
 343        td_maxwin += seq + len - sender.td_maxend
 344                        if seq + len > sender.td_maxend
 345        td_end    = max(seq + len) seen in sent packets
 346
 347   I.   Upper bound for valid data:     seq <= sender.td_maxend
 348   II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
 349   III. Upper bound for valid (s)ack:   sack <= receiver.td_end
 350   IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
 351
 352   where sack is the highest right edge of sack block found in the packet
 353   or ack in the case of packet without SACK option.
 354
 355   The upper bound limit for a valid (s)ack is not ignored -
 356   we doesn't have to deal with fragments.
 357*/
 358
 359static inline __u32 segment_seq_plus_len(__u32 seq,
 360                                         size_t len,
 361                                         unsigned int dataoff,
 362                                         const struct tcphdr *tcph)
 363{
 364        /* XXX Should I use payload length field in IP/IPv6 header ?
 365         * - YK */
 366        return (seq + len - dataoff - tcph->doff*4
 367                + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
 368}
 369
 370/* Fixme: what about big packets? */
 371#define MAXACKWINCONST                  66000
 372#define MAXACKWINDOW(sender)                                            \
 373        ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
 374                                              : MAXACKWINCONST)
 375
 376/*
 377 * Simplified tcp_parse_options routine from tcp_input.c
 378 */
 379static void tcp_options(const struct sk_buff *skb,
 380                        unsigned int dataoff,
 381                        const struct tcphdr *tcph,
 382                        struct ip_ct_tcp_state *state)
 383{
 384        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 385        const unsigned char *ptr;
 386        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 387
 388        if (!length)
 389                return;
 390
 391        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 392                                 length, buff);
 393        BUG_ON(ptr == NULL);
 394
 395        state->td_scale =
 396        state->flags = 0;
 397
 398        while (length > 0) {
 399                int opcode=*ptr++;
 400                int opsize;
 401
 402                switch (opcode) {
 403                case TCPOPT_EOL:
 404                        return;
 405                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 406                        length--;
 407                        continue;
 408                default:
 409                        opsize=*ptr++;
 410                        if (opsize < 2) /* "silly options" */
 411                                return;
 412                        if (opsize > length)
 413                                return; /* don't parse partial options */
 414
 415                        if (opcode == TCPOPT_SACK_PERM
 416                            && opsize == TCPOLEN_SACK_PERM)
 417                                state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
 418                        else if (opcode == TCPOPT_WINDOW
 419                                 && opsize == TCPOLEN_WINDOW) {
 420                                state->td_scale = *(u_int8_t *)ptr;
 421
 422                                if (state->td_scale > 14) {
 423                                        /* See RFC1323 */
 424                                        state->td_scale = 14;
 425                                }
 426                                state->flags |=
 427                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 428                        }
 429                        ptr += opsize - 2;
 430                        length -= opsize;
 431                }
 432        }
 433}
 434
 435static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 436                     const struct tcphdr *tcph, __u32 *sack)
 437{
 438        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 439        const unsigned char *ptr;
 440        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 441        __u32 tmp;
 442
 443        if (!length)
 444                return;
 445
 446        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 447                                 length, buff);
 448        BUG_ON(ptr == NULL);
 449
 450        /* Fast path for timestamp-only option */
 451        if (length == TCPOLEN_TSTAMP_ALIGNED
 452            && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
 453                                       | (TCPOPT_NOP << 16)
 454                                       | (TCPOPT_TIMESTAMP << 8)
 455                                       | TCPOLEN_TIMESTAMP))
 456                return;
 457
 458        while (length > 0) {
 459                int opcode = *ptr++;
 460                int opsize, i;
 461
 462                switch (opcode) {
 463                case TCPOPT_EOL:
 464                        return;
 465                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 466                        length--;
 467                        continue;
 468                default:
 469                        opsize = *ptr++;
 470                        if (opsize < 2) /* "silly options" */
 471                                return;
 472                        if (opsize > length)
 473                                return; /* don't parse partial options */
 474
 475                        if (opcode == TCPOPT_SACK
 476                            && opsize >= (TCPOLEN_SACK_BASE
 477                                          + TCPOLEN_SACK_PERBLOCK)
 478                            && !((opsize - TCPOLEN_SACK_BASE)
 479                                 % TCPOLEN_SACK_PERBLOCK)) {
 480                                for (i = 0;
 481                                     i < (opsize - TCPOLEN_SACK_BASE);
 482                                     i += TCPOLEN_SACK_PERBLOCK) {
 483                                        tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
 484
 485                                        if (after(tmp, *sack))
 486                                                *sack = tmp;
 487                                }
 488                                return;
 489                        }
 490                        ptr += opsize - 2;
 491                        length -= opsize;
 492                }
 493        }
 494}
 495
 496#ifdef CONFIG_NF_NAT_NEEDED
 497static inline s16 nat_offset(const struct nf_conn *ct,
 498                             enum ip_conntrack_dir dir,
 499                             u32 seq)
 500{
 501        typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
 502
 503        return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
 504}
 505#define NAT_OFFSET(pf, ct, dir, seq) \
 506        (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
 507#else
 508#define NAT_OFFSET(pf, ct, dir, seq)    0
 509#endif
 510
 511static bool tcp_in_window(const struct nf_conn *ct,
 512                          struct ip_ct_tcp *state,
 513                          enum ip_conntrack_dir dir,
 514                          unsigned int index,
 515                          const struct sk_buff *skb,
 516                          unsigned int dataoff,
 517                          const struct tcphdr *tcph,
 518                          u_int8_t pf)
 519{
 520        struct net *net = nf_ct_net(ct);
 521        struct nf_tcp_net *tn = tcp_pernet(net);
 522        struct ip_ct_tcp_state *sender = &state->seen[dir];
 523        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 524        const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 525        __u32 seq, ack, sack, end, win, swin;
 526        s16 receiver_offset;
 527        bool res;
 528
 529        /*
 530         * Get the required data from the packet.
 531         */
 532        seq = ntohl(tcph->seq);
 533        ack = sack = ntohl(tcph->ack_seq);
 534        win = ntohs(tcph->window);
 535        end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
 536
 537        if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
 538                tcp_sack(skb, dataoff, tcph, &sack);
 539
 540        /* Take into account NAT sequence number mangling */
 541        receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
 542        ack -= receiver_offset;
 543        sack -= receiver_offset;
 544
 545        pr_debug("tcp_in_window: START\n");
 546        pr_debug("tcp_in_window: ");
 547        nf_ct_dump_tuple(tuple);
 548        pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
 549                 seq, ack, receiver_offset, sack, receiver_offset, win, end);
 550        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 551                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 552                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 553                 sender->td_scale,
 554                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 555                 receiver->td_scale);
 556
 557        if (sender->td_maxwin == 0) {
 558                /*
 559                 * Initialize sender data.
 560                 */
 561                if (tcph->syn) {
 562                        /*
 563                         * SYN-ACK in reply to a SYN
 564                         * or SYN from reply direction in simultaneous open.
 565                         */
 566                        sender->td_end =
 567                        sender->td_maxend = end;
 568                        sender->td_maxwin = (win == 0 ? 1 : win);
 569
 570                        tcp_options(skb, dataoff, tcph, sender);
 571                        /*
 572                         * RFC 1323:
 573                         * Both sides must send the Window Scale option
 574                         * to enable window scaling in either direction.
 575                         */
 576                        if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
 577                              && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
 578                                sender->td_scale =
 579                                receiver->td_scale = 0;
 580                        if (!tcph->ack)
 581                                /* Simultaneous open */
 582                                return true;
 583                } else {
 584                        /*
 585                         * We are in the middle of a connection,
 586                         * its history is lost for us.
 587                         * Let's try to use the data from the packet.
 588                         */
 589                        sender->td_end = end;
 590                        swin = win << sender->td_scale;
 591                        sender->td_maxwin = (swin == 0 ? 1 : swin);
 592                        sender->td_maxend = end + sender->td_maxwin;
 593                        /*
 594                         * We haven't seen traffic in the other direction yet
 595                         * but we have to tweak window tracking to pass III
 596                         * and IV until that happens.
 597                         */
 598                        if (receiver->td_maxwin == 0)
 599                                receiver->td_end = receiver->td_maxend = sack;
 600                }
 601        } else if (((state->state == TCP_CONNTRACK_SYN_SENT
 602                     && dir == IP_CT_DIR_ORIGINAL)
 603                   || (state->state == TCP_CONNTRACK_SYN_RECV
 604                     && dir == IP_CT_DIR_REPLY))
 605                   && after(end, sender->td_end)) {
 606                /*
 607                 * RFC 793: "if a TCP is reinitialized ... then it need
 608                 * not wait at all; it must only be sure to use sequence
 609                 * numbers larger than those recently used."
 610                 */
 611                sender->td_end =
 612                sender->td_maxend = end;
 613                sender->td_maxwin = (win == 0 ? 1 : win);
 614
 615                tcp_options(skb, dataoff, tcph, sender);
 616        }
 617
 618        if (!(tcph->ack)) {
 619                /*
 620                 * If there is no ACK, just pretend it was set and OK.
 621                 */
 622                ack = sack = receiver->td_end;
 623        } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
 624                    (TCP_FLAG_ACK|TCP_FLAG_RST))
 625                   && (ack == 0)) {
 626                /*
 627                 * Broken TCP stacks, that set ACK in RST packets as well
 628                 * with zero ack value.
 629                 */
 630                ack = sack = receiver->td_end;
 631        }
 632
 633        if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
 634                /*
 635                 * RST sent answering SYN.
 636                 */
 637                seq = end = sender->td_end;
 638
 639        pr_debug("tcp_in_window: ");
 640        nf_ct_dump_tuple(tuple);
 641        pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
 642                 seq, ack, receiver_offset, sack, receiver_offset, win, end);
 643        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 644                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 645                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 646                 sender->td_scale,
 647                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 648                 receiver->td_scale);
 649
 650        pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 651                 before(seq, sender->td_maxend + 1),
 652                 after(end, sender->td_end - receiver->td_maxwin - 1),
 653                 before(sack, receiver->td_end + 1),
 654                 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 655
 656        if (before(seq, sender->td_maxend + 1) &&
 657            after(end, sender->td_end - receiver->td_maxwin - 1) &&
 658            before(sack, receiver->td_end + 1) &&
 659            after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 660                /*
 661                 * Take into account window scaling (RFC 1323).
 662                 */
 663                if (!tcph->syn)
 664                        win <<= sender->td_scale;
 665
 666                /*
 667                 * Update sender data.
 668                 */
 669                swin = win + (sack - ack);
 670                if (sender->td_maxwin < swin)
 671                        sender->td_maxwin = swin;
 672                if (after(end, sender->td_end)) {
 673                        sender->td_end = end;
 674                        sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 675                }
 676                if (tcph->ack) {
 677                        if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
 678                                sender->td_maxack = ack;
 679                                sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
 680                        } else if (after(ack, sender->td_maxack))
 681                                sender->td_maxack = ack;
 682                }
 683
 684                /*
 685                 * Update receiver data.
 686                 */
 687                if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
 688                        receiver->td_maxwin += end - sender->td_maxend;
 689                if (after(sack + win, receiver->td_maxend - 1)) {
 690                        receiver->td_maxend = sack + win;
 691                        if (win == 0)
 692                                receiver->td_maxend++;
 693                }
 694                if (ack == receiver->td_end)
 695                        receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 696
 697                /*
 698                 * Check retransmissions.
 699                 */
 700                if (index == TCP_ACK_SET) {
 701                        if (state->last_dir == dir
 702                            && state->last_seq == seq
 703                            && state->last_ack == ack
 704                            && state->last_end == end
 705                            && state->last_win == win)
 706                                state->retrans++;
 707                        else {
 708                                state->last_dir = dir;
 709                                state->last_seq = seq;
 710                                state->last_ack = ack;
 711                                state->last_end = end;
 712                                state->last_win = win;
 713                                state->retrans = 0;
 714                        }
 715                }
 716                res = true;
 717        } else {
 718                res = false;
 719                if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 720                    tn->tcp_be_liberal)
 721                        res = true;
 722                if (!res && LOG_INVALID(net, IPPROTO_TCP))
 723                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 724                        "nf_ct_tcp: %s ",
 725                        before(seq, sender->td_maxend + 1) ?
 726                        after(end, sender->td_end - receiver->td_maxwin - 1) ?
 727                        before(sack, receiver->td_end + 1) ?
 728                        after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
 729                        : "ACK is under the lower bound (possible overly delayed ACK)"
 730                        : "ACK is over the upper bound (ACKed data not seen yet)"
 731                        : "SEQ is under the lower bound (already ACKed data retransmitted)"
 732                        : "SEQ is over the upper bound (over the window of the receiver)");
 733        }
 734
 735        pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
 736                 "receiver end=%u maxend=%u maxwin=%u\n",
 737                 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
 738                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
 739
 740        return res;
 741}
 742
 743/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
 744static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
 745                                 TCPHDR_URG) + 1] =
 746{
 747        [TCPHDR_SYN]                            = 1,
 748        [TCPHDR_SYN|TCPHDR_URG]                 = 1,
 749        [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
 750        [TCPHDR_RST]                            = 1,
 751        [TCPHDR_RST|TCPHDR_ACK]                 = 1,
 752        [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
 753        [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
 754        [TCPHDR_ACK]                            = 1,
 755        [TCPHDR_ACK|TCPHDR_URG]                 = 1,
 756};
 757
 758/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
 759static int tcp_error(struct net *net, struct nf_conn *tmpl,
 760                     struct sk_buff *skb,
 761                     unsigned int dataoff,
 762                     enum ip_conntrack_info *ctinfo,
 763                     u_int8_t pf,
 764                     unsigned int hooknum)
 765{
 766        const struct tcphdr *th;
 767        struct tcphdr _tcph;
 768        unsigned int tcplen = skb->len - dataoff;
 769        u_int8_t tcpflags;
 770
 771        /* Smaller that minimal TCP header? */
 772        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 773        if (th == NULL) {
 774                if (LOG_INVALID(net, IPPROTO_TCP))
 775                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 776                                "nf_ct_tcp: short packet ");
 777                return -NF_ACCEPT;
 778        }
 779
 780        /* Not whole TCP header or malformed packet */
 781        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
 782                if (LOG_INVALID(net, IPPROTO_TCP))
 783                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 784                                "nf_ct_tcp: truncated/malformed packet ");
 785                return -NF_ACCEPT;
 786        }
 787
 788        /* Checksum invalid? Ignore.
 789         * We skip checking packets on the outgoing path
 790         * because the checksum is assumed to be correct.
 791         */
 792        /* FIXME: Source route IP option packets --RR */
 793        if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 794            nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 795                if (LOG_INVALID(net, IPPROTO_TCP))
 796                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 797                                  "nf_ct_tcp: bad TCP checksum ");
 798                return -NF_ACCEPT;
 799        }
 800
 801        /* Check TCP flags. */
 802        tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 803        if (!tcp_valid_flags[tcpflags]) {
 804                if (LOG_INVALID(net, IPPROTO_TCP))
 805                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 806                                  "nf_ct_tcp: invalid TCP flag combination ");
 807                return -NF_ACCEPT;
 808        }
 809
 810        return NF_ACCEPT;
 811}
 812
 813static unsigned int *tcp_get_timeouts(struct net *net)
 814{
 815        return tcp_pernet(net)->timeouts;
 816}
 817
 818/* Returns verdict for packet, or -1 for invalid. */
 819static int tcp_packet(struct nf_conn *ct,
 820                      const struct sk_buff *skb,
 821                      unsigned int dataoff,
 822                      enum ip_conntrack_info ctinfo,
 823                      u_int8_t pf,
 824                      unsigned int hooknum,
 825                      unsigned int *timeouts)
 826{
 827        struct net *net = nf_ct_net(ct);
 828        struct nf_tcp_net *tn = tcp_pernet(net);
 829        struct nf_conntrack_tuple *tuple;
 830        enum tcp_conntrack new_state, old_state;
 831        enum ip_conntrack_dir dir;
 832        const struct tcphdr *th;
 833        struct tcphdr _tcph;
 834        unsigned long timeout;
 835        unsigned int index;
 836
 837        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 838        BUG_ON(th == NULL);
 839
 840        spin_lock_bh(&ct->lock);
 841        old_state = ct->proto.tcp.state;
 842        dir = CTINFO2DIR(ctinfo);
 843        index = get_conntrack_index(th);
 844        new_state = tcp_conntracks[dir][index][old_state];
 845        tuple = &ct->tuplehash[dir].tuple;
 846
 847        switch (new_state) {
 848        case TCP_CONNTRACK_SYN_SENT:
 849                if (old_state < TCP_CONNTRACK_TIME_WAIT)
 850                        break;
 851                /* RFC 1122: "When a connection is closed actively,
 852                 * it MUST linger in TIME-WAIT state for a time 2xMSL
 853                 * (Maximum Segment Lifetime). However, it MAY accept
 854                 * a new SYN from the remote TCP to reopen the connection
 855                 * directly from TIME-WAIT state, if..."
 856                 * We ignore the conditions because we are in the
 857                 * TIME-WAIT state anyway.
 858                 *
 859                 * Handle aborted connections: we and the server
 860                 * think there is an existing connection but the client
 861                 * aborts it and starts a new one.
 862                 */
 863                if (((ct->proto.tcp.seen[dir].flags
 864                      | ct->proto.tcp.seen[!dir].flags)
 865                     & IP_CT_TCP_FLAG_CLOSE_INIT)
 866                    || (ct->proto.tcp.last_dir == dir
 867                        && ct->proto.tcp.last_index == TCP_RST_SET)) {
 868                        /* Attempt to reopen a closed/aborted connection.
 869                         * Delete this connection and look up again. */
 870                        spin_unlock_bh(&ct->lock);
 871
 872                        /* Only repeat if we can actually remove the timer.
 873                         * Destruction may already be in progress in process
 874                         * context and we must give it a chance to terminate.
 875                         */
 876                        if (nf_ct_kill(ct))
 877                                return -NF_REPEAT;
 878                        return NF_DROP;
 879                }
 880                /* Fall through */
 881        case TCP_CONNTRACK_IGNORE:
 882                /* Ignored packets:
 883                 *
 884                 * Our connection entry may be out of sync, so ignore
 885                 * packets which may signal the real connection between
 886                 * the client and the server.
 887                 *
 888                 * a) SYN in ORIGINAL
 889                 * b) SYN/ACK in REPLY
 890                 * c) ACK in reply direction after initial SYN in original.
 891                 *
 892                 * If the ignored packet is invalid, the receiver will send
 893                 * a RST we'll catch below.
 894                 */
 895                if (index == TCP_SYNACK_SET
 896                    && ct->proto.tcp.last_index == TCP_SYN_SET
 897                    && ct->proto.tcp.last_dir != dir
 898                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 899                        /* b) This SYN/ACK acknowledges a SYN that we earlier
 900                         * ignored as invalid. This means that the client and
 901                         * the server are both in sync, while the firewall is
 902                         * not. We get in sync from the previously annotated
 903                         * values.
 904                         */
 905                        old_state = TCP_CONNTRACK_SYN_SENT;
 906                        new_state = TCP_CONNTRACK_SYN_RECV;
 907                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
 908                                ct->proto.tcp.last_end;
 909                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
 910                                ct->proto.tcp.last_end;
 911                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
 912                                ct->proto.tcp.last_win == 0 ?
 913                                        1 : ct->proto.tcp.last_win;
 914                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
 915                                ct->proto.tcp.last_wscale;
 916                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
 917                                ct->proto.tcp.last_flags;
 918                        memset(&ct->proto.tcp.seen[dir], 0,
 919                               sizeof(struct ip_ct_tcp_state));
 920                        break;
 921                }
 922                ct->proto.tcp.last_index = index;
 923                ct->proto.tcp.last_dir = dir;
 924                ct->proto.tcp.last_seq = ntohl(th->seq);
 925                ct->proto.tcp.last_end =
 926                    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 927                ct->proto.tcp.last_win = ntohs(th->window);
 928
 929                /* a) This is a SYN in ORIGINAL. The client and the server
 930                 * may be in sync but we are not. In that case, we annotate
 931                 * the TCP options and let the packet go through. If it is a
 932                 * valid SYN packet, the server will reply with a SYN/ACK, and
 933                 * then we'll get in sync. Otherwise, the server ignores it. */
 934                if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
 935                        struct ip_ct_tcp_state seen = {};
 936
 937                        ct->proto.tcp.last_flags =
 938                        ct->proto.tcp.last_wscale = 0;
 939                        tcp_options(skb, dataoff, th, &seen);
 940                        if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
 941                                ct->proto.tcp.last_flags |=
 942                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 943                                ct->proto.tcp.last_wscale = seen.td_scale;
 944                        }
 945                        if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
 946                                ct->proto.tcp.last_flags |=
 947                                        IP_CT_TCP_FLAG_SACK_PERM;
 948                        }
 949                }
 950                spin_unlock_bh(&ct->lock);
 951                if (LOG_INVALID(net, IPPROTO_TCP))
 952                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 953                                  "nf_ct_tcp: invalid packet ignored in "
 954                                  "state %s ", tcp_conntrack_names[old_state]);
 955                return NF_ACCEPT;
 956        case TCP_CONNTRACK_MAX:
 957                /* Invalid packet */
 958                pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 959                         dir, get_conntrack_index(th), old_state);
 960                spin_unlock_bh(&ct->lock);
 961                if (LOG_INVALID(net, IPPROTO_TCP))
 962                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 963                                  "nf_ct_tcp: invalid state ");
 964                return -NF_ACCEPT;
 965        case TCP_CONNTRACK_CLOSE:
 966                if (index == TCP_RST_SET
 967                    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
 968                    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
 969                        /* Invalid RST  */
 970                        spin_unlock_bh(&ct->lock);
 971                        if (LOG_INVALID(net, IPPROTO_TCP))
 972                                nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 973                                          "nf_ct_tcp: invalid RST ");
 974                        return -NF_ACCEPT;
 975                }
 976                if (index == TCP_RST_SET
 977                    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
 978                         && ct->proto.tcp.last_index == TCP_SYN_SET)
 979                        || (!test_bit(IPS_ASSURED_BIT, &ct->status)
 980                            && ct->proto.tcp.last_index == TCP_ACK_SET))
 981                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 982                        /* RST sent to invalid SYN or ACK we had let through
 983                         * at a) and c) above:
 984                         *
 985                         * a) SYN was in window then
 986                         * c) we hold a half-open connection.
 987                         *
 988                         * Delete our connection entry.
 989                         * We skip window checking, because packet might ACK
 990                         * segments we ignored. */
 991                        goto in_window;
 992                }
 993                /* Just fall through */
 994        default:
 995                /* Keep compilers happy. */
 996                break;
 997        }
 998
 999        if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1000                           skb, dataoff, th, pf)) {
1001                spin_unlock_bh(&ct->lock);
1002                return -NF_ACCEPT;
1003        }
1004     in_window:
1005        /* From now on we have got in-window packets */
1006        ct->proto.tcp.last_index = index;
1007        ct->proto.tcp.last_dir = dir;
1008
1009        pr_debug("tcp_conntracks: ");
1010        nf_ct_dump_tuple(tuple);
1011        pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1012                 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1013                 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1014                 old_state, new_state);
1015
1016        ct->proto.tcp.state = new_state;
1017        if (old_state != new_state
1018            && new_state == TCP_CONNTRACK_FIN_WAIT)
1019                ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1020
1021        if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1022            timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1023                timeout = timeouts[TCP_CONNTRACK_RETRANS];
1024        else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1025                 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1026                 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1027                timeout = timeouts[TCP_CONNTRACK_UNACK];
1028        else
1029                timeout = timeouts[new_state];
1030        spin_unlock_bh(&ct->lock);
1031
1032        if (new_state != old_state)
1033                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1034
1035        if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1036                /* If only reply is a RST, we can consider ourselves not to
1037                   have an established connection: this is a fairly common
1038                   problem case, so we can delete the conntrack
1039                   immediately.  --RR */
1040                if (th->rst) {
1041                        nf_ct_kill_acct(ct, ctinfo, skb);
1042                        return NF_ACCEPT;
1043                }
1044        } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1045                   && (old_state == TCP_CONNTRACK_SYN_RECV
1046                       || old_state == TCP_CONNTRACK_ESTABLISHED)
1047                   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1048                /* Set ASSURED if we see see valid ack in ESTABLISHED
1049                   after SYN_RECV or a valid answer for a picked up
1050                   connection. */
1051                set_bit(IPS_ASSURED_BIT, &ct->status);
1052                nf_conntrack_event_cache(IPCT_ASSURED, ct);
1053        }
1054        nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1055
1056        return NF_ACCEPT;
1057}
1058
1059/* Called when a new connection for this protocol found. */
1060static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1061                    unsigned int dataoff, unsigned int *timeouts)
1062{
1063        enum tcp_conntrack new_state;
1064        const struct tcphdr *th;
1065        struct tcphdr _tcph;
1066        struct net *net = nf_ct_net(ct);
1067        struct nf_tcp_net *tn = tcp_pernet(net);
1068        const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1069        const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1070
1071        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1072        BUG_ON(th == NULL);
1073
1074        /* Don't need lock here: this conntrack not in circulation yet */
1075        new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1076
1077        /* Invalid: delete conntrack */
1078        if (new_state >= TCP_CONNTRACK_MAX) {
1079                pr_debug("nf_ct_tcp: invalid new deleting.\n");
1080                return false;
1081        }
1082
1083        if (new_state == TCP_CONNTRACK_SYN_SENT) {
1084                memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1085                /* SYN packet */
1086                ct->proto.tcp.seen[0].td_end =
1087                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1088                                             dataoff, th);
1089                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1090                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1091                        ct->proto.tcp.seen[0].td_maxwin = 1;
1092                ct->proto.tcp.seen[0].td_maxend =
1093                        ct->proto.tcp.seen[0].td_end;
1094
1095                tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1096        } else if (tn->tcp_loose == 0) {
1097                /* Don't try to pick up connections. */
1098                return false;
1099        } else {
1100                memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1101                /*
1102                 * We are in the middle of a connection,
1103                 * its history is lost for us.
1104                 * Let's try to use the data from the packet.
1105                 */
1106                ct->proto.tcp.seen[0].td_end =
1107                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1108                                             dataoff, th);
1109                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1110                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1111                        ct->proto.tcp.seen[0].td_maxwin = 1;
1112                ct->proto.tcp.seen[0].td_maxend =
1113                        ct->proto.tcp.seen[0].td_end +
1114                        ct->proto.tcp.seen[0].td_maxwin;
1115
1116                /* We assume SACK and liberal window checking to handle
1117                 * window scaling */
1118                ct->proto.tcp.seen[0].flags =
1119                ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1120                                              IP_CT_TCP_FLAG_BE_LIBERAL;
1121        }
1122
1123        /* tcp_packet will set them */
1124        ct->proto.tcp.last_index = TCP_NONE_SET;
1125
1126        pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1127                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1128                 sender->td_end, sender->td_maxend, sender->td_maxwin,
1129                 sender->td_scale,
1130                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1131                 receiver->td_scale);
1132        return true;
1133}
1134
1135#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1136
1137#include <linux/netfilter/nfnetlink.h>
1138#include <linux/netfilter/nfnetlink_conntrack.h>
1139
1140static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1141                         struct nf_conn *ct)
1142{
1143        struct nlattr *nest_parms;
1144        struct nf_ct_tcp_flags tmp = {};
1145
1146        spin_lock_bh(&ct->lock);
1147        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1148        if (!nest_parms)
1149                goto nla_put_failure;
1150
1151        if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1152            nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1153                       ct->proto.tcp.seen[0].td_scale) ||
1154            nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1155                       ct->proto.tcp.seen[1].td_scale))
1156                goto nla_put_failure;
1157
1158        tmp.flags = ct->proto.tcp.seen[0].flags;
1159        if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1160                    sizeof(struct nf_ct_tcp_flags), &tmp))
1161                goto nla_put_failure;
1162
1163        tmp.flags = ct->proto.tcp.seen[1].flags;
1164        if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1165                    sizeof(struct nf_ct_tcp_flags), &tmp))
1166                goto nla_put_failure;
1167        spin_unlock_bh(&ct->lock);
1168
1169        nla_nest_end(skb, nest_parms);
1170
1171        return 0;
1172
1173nla_put_failure:
1174        spin_unlock_bh(&ct->lock);
1175        return -1;
1176}
1177
1178static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1179        [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1180        [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1181        [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1182        [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1183        [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1184};
1185
1186static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1187{
1188        struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1189        struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1190        int err;
1191
1192        /* updates could not contain anything about the private
1193         * protocol info, in that case skip the parsing */
1194        if (!pattr)
1195                return 0;
1196
1197        err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1198        if (err < 0)
1199                return err;
1200
1201        if (tb[CTA_PROTOINFO_TCP_STATE] &&
1202            nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1203                return -EINVAL;
1204
1205        spin_lock_bh(&ct->lock);
1206        if (tb[CTA_PROTOINFO_TCP_STATE])
1207                ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1208
1209        if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1210                struct nf_ct_tcp_flags *attr =
1211                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1212                ct->proto.tcp.seen[0].flags &= ~attr->mask;
1213                ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1214        }
1215
1216        if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1217                struct nf_ct_tcp_flags *attr =
1218                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1219                ct->proto.tcp.seen[1].flags &= ~attr->mask;
1220                ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1221        }
1222
1223        if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1224            tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1225            ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1226            ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1227                ct->proto.tcp.seen[0].td_scale =
1228                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1229                ct->proto.tcp.seen[1].td_scale =
1230                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1231        }
1232        spin_unlock_bh(&ct->lock);
1233
1234        return 0;
1235}
1236
1237static int tcp_nlattr_size(void)
1238{
1239        return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1240                + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1241}
1242
1243static int tcp_nlattr_tuple_size(void)
1244{
1245        return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1246}
1247#endif
1248
1249#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1250
1251#include <linux/netfilter/nfnetlink.h>
1252#include <linux/netfilter/nfnetlink_cttimeout.h>
1253
1254static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1255                                     struct net *net, void *data)
1256{
1257        unsigned int *timeouts = data;
1258        struct nf_tcp_net *tn = tcp_pernet(net);
1259        int i;
1260
1261        /* set default TCP timeouts. */
1262        for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1263                timeouts[i] = tn->timeouts[i];
1264
1265        if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1266                timeouts[TCP_CONNTRACK_SYN_SENT] =
1267                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1268        }
1269        if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1270                timeouts[TCP_CONNTRACK_SYN_RECV] =
1271                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1272        }
1273        if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1274                timeouts[TCP_CONNTRACK_ESTABLISHED] =
1275                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1276        }
1277        if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1278                timeouts[TCP_CONNTRACK_FIN_WAIT] =
1279                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1280        }
1281        if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1282                timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1283                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1284        }
1285        if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1286                timeouts[TCP_CONNTRACK_LAST_ACK] =
1287                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1288        }
1289        if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1290                timeouts[TCP_CONNTRACK_TIME_WAIT] =
1291                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1292        }
1293        if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1294                timeouts[TCP_CONNTRACK_CLOSE] =
1295                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1296        }
1297        if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1298                timeouts[TCP_CONNTRACK_SYN_SENT2] =
1299                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1300        }
1301        if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1302                timeouts[TCP_CONNTRACK_RETRANS] =
1303                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1304        }
1305        if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1306                timeouts[TCP_CONNTRACK_UNACK] =
1307                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1308        }
1309        return 0;
1310}
1311
1312static int
1313tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1314{
1315        const unsigned int *timeouts = data;
1316
1317        if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1318                        htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1319            nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1320                         htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1321            nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1322                         htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1323            nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1324                         htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1325            nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1326                         htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1327            nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1328                         htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1329            nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1330                         htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1331            nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1332                         htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1333            nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1334                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1335            nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1336                         htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1337            nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1338                         htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1339                goto nla_put_failure;
1340        return 0;
1341
1342nla_put_failure:
1343        return -ENOSPC;
1344}
1345
1346static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1347        [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1348        [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1349        [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1350        [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1351        [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1352        [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1353        [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1354        [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1355        [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1356};
1357#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1358
1359#ifdef CONFIG_SYSCTL
1360static struct ctl_table tcp_sysctl_table[] = {
1361        {
1362                .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1363                .maxlen         = sizeof(unsigned int),
1364                .mode           = 0644,
1365                .proc_handler   = proc_dointvec_jiffies,
1366        },
1367        {
1368                .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1369                .maxlen         = sizeof(unsigned int),
1370                .mode           = 0644,
1371                .proc_handler   = proc_dointvec_jiffies,
1372        },
1373        {
1374                .procname       = "nf_conntrack_tcp_timeout_established",
1375                .maxlen         = sizeof(unsigned int),
1376                .mode           = 0644,
1377                .proc_handler   = proc_dointvec_jiffies,
1378        },
1379        {
1380                .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1381                .maxlen         = sizeof(unsigned int),
1382                .mode           = 0644,
1383                .proc_handler   = proc_dointvec_jiffies,
1384        },
1385        {
1386                .procname       = "nf_conntrack_tcp_timeout_close_wait",
1387                .maxlen         = sizeof(unsigned int),
1388                .mode           = 0644,
1389                .proc_handler   = proc_dointvec_jiffies,
1390        },
1391        {
1392                .procname       = "nf_conntrack_tcp_timeout_last_ack",
1393                .maxlen         = sizeof(unsigned int),
1394                .mode           = 0644,
1395                .proc_handler   = proc_dointvec_jiffies,
1396        },
1397        {
1398                .procname       = "nf_conntrack_tcp_timeout_time_wait",
1399                .maxlen         = sizeof(unsigned int),
1400                .mode           = 0644,
1401                .proc_handler   = proc_dointvec_jiffies,
1402        },
1403        {
1404                .procname       = "nf_conntrack_tcp_timeout_close",
1405                .maxlen         = sizeof(unsigned int),
1406                .mode           = 0644,
1407                .proc_handler   = proc_dointvec_jiffies,
1408        },
1409        {
1410                .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1411                .maxlen         = sizeof(unsigned int),
1412                .mode           = 0644,
1413                .proc_handler   = proc_dointvec_jiffies,
1414        },
1415        {
1416                .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1417                .maxlen         = sizeof(unsigned int),
1418                .mode           = 0644,
1419                .proc_handler   = proc_dointvec_jiffies,
1420        },
1421        {
1422                .procname       = "nf_conntrack_tcp_loose",
1423                .maxlen         = sizeof(unsigned int),
1424                .mode           = 0644,
1425                .proc_handler   = proc_dointvec,
1426        },
1427        {
1428                .procname       = "nf_conntrack_tcp_be_liberal",
1429                .maxlen         = sizeof(unsigned int),
1430                .mode           = 0644,
1431                .proc_handler   = proc_dointvec,
1432        },
1433        {
1434                .procname       = "nf_conntrack_tcp_max_retrans",
1435                .maxlen         = sizeof(unsigned int),
1436                .mode           = 0644,
1437                .proc_handler   = proc_dointvec,
1438        },
1439        { }
1440};
1441
1442#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1443static struct ctl_table tcp_compat_sysctl_table[] = {
1444        {
1445                .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1446                .maxlen         = sizeof(unsigned int),
1447                .mode           = 0644,
1448                .proc_handler   = proc_dointvec_jiffies,
1449        },
1450        {
1451                .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1452                .maxlen         = sizeof(unsigned int),
1453                .mode           = 0644,
1454                .proc_handler   = proc_dointvec_jiffies,
1455        },
1456        {
1457                .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1458                .maxlen         = sizeof(unsigned int),
1459                .mode           = 0644,
1460                .proc_handler   = proc_dointvec_jiffies,
1461        },
1462        {
1463                .procname       = "ip_conntrack_tcp_timeout_established",
1464                .maxlen         = sizeof(unsigned int),
1465                .mode           = 0644,
1466                .proc_handler   = proc_dointvec_jiffies,
1467        },
1468        {
1469                .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1470                .maxlen         = sizeof(unsigned int),
1471                .mode           = 0644,
1472                .proc_handler   = proc_dointvec_jiffies,
1473        },
1474        {
1475                .procname       = "ip_conntrack_tcp_timeout_close_wait",
1476                .maxlen         = sizeof(unsigned int),
1477                .mode           = 0644,
1478                .proc_handler   = proc_dointvec_jiffies,
1479        },
1480        {
1481                .procname       = "ip_conntrack_tcp_timeout_last_ack",
1482                .maxlen         = sizeof(unsigned int),
1483                .mode           = 0644,
1484                .proc_handler   = proc_dointvec_jiffies,
1485        },
1486        {
1487                .procname       = "ip_conntrack_tcp_timeout_time_wait",
1488                .maxlen         = sizeof(unsigned int),
1489                .mode           = 0644,
1490                .proc_handler   = proc_dointvec_jiffies,
1491        },
1492        {
1493                .procname       = "ip_conntrack_tcp_timeout_close",
1494                .maxlen         = sizeof(unsigned int),
1495                .mode           = 0644,
1496                .proc_handler   = proc_dointvec_jiffies,
1497        },
1498        {
1499                .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1500                .maxlen         = sizeof(unsigned int),
1501                .mode           = 0644,
1502                .proc_handler   = proc_dointvec_jiffies,
1503        },
1504        {
1505                .procname       = "ip_conntrack_tcp_loose",
1506                .maxlen         = sizeof(unsigned int),
1507                .mode           = 0644,
1508                .proc_handler   = proc_dointvec,
1509        },
1510        {
1511                .procname       = "ip_conntrack_tcp_be_liberal",
1512                .maxlen         = sizeof(unsigned int),
1513                .mode           = 0644,
1514                .proc_handler   = proc_dointvec,
1515        },
1516        {
1517                .procname       = "ip_conntrack_tcp_max_retrans",
1518                .maxlen         = sizeof(unsigned int),
1519                .mode           = 0644,
1520                .proc_handler   = proc_dointvec,
1521        },
1522        { }
1523};
1524#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1525#endif /* CONFIG_SYSCTL */
1526
1527static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1528                                    struct nf_tcp_net *tn)
1529{
1530#ifdef CONFIG_SYSCTL
1531        if (pn->ctl_table)
1532                return 0;
1533
1534        pn->ctl_table = kmemdup(tcp_sysctl_table,
1535                                sizeof(tcp_sysctl_table),
1536                                GFP_KERNEL);
1537        if (!pn->ctl_table)
1538                return -ENOMEM;
1539
1540        pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1541        pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1542        pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1543        pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1544        pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1545        pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1546        pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1547        pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1548        pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1549        pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1550        pn->ctl_table[10].data = &tn->tcp_loose;
1551        pn->ctl_table[11].data = &tn->tcp_be_liberal;
1552        pn->ctl_table[12].data = &tn->tcp_max_retrans;
1553#endif
1554        return 0;
1555}
1556
1557static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1558                                           struct nf_tcp_net *tn)
1559{
1560#ifdef CONFIG_SYSCTL
1561#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1562        pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1563                                       sizeof(tcp_compat_sysctl_table),
1564                                       GFP_KERNEL);
1565        if (!pn->ctl_compat_table)
1566                return -ENOMEM;
1567
1568        pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1569        pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1570        pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1571        pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1572        pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1573        pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1574        pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1575        pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1576        pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1577        pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1578        pn->ctl_compat_table[10].data = &tn->tcp_loose;
1579        pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1580        pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1581#endif
1582#endif
1583        return 0;
1584}
1585
1586static int tcp_init_net(struct net *net, u_int16_t proto)
1587{
1588        int ret;
1589        struct nf_tcp_net *tn = tcp_pernet(net);
1590        struct nf_proto_net *pn = &tn->pn;
1591
1592        if (!pn->users) {
1593                int i;
1594
1595                for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1596                        tn->timeouts[i] = tcp_timeouts[i];
1597
1598                tn->tcp_loose = nf_ct_tcp_loose;
1599                tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1600                tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1601        }
1602
1603        if (proto == AF_INET) {
1604                ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1605                if (ret < 0)
1606                        return ret;
1607
1608                ret = tcp_kmemdup_sysctl_table(pn, tn);
1609                if (ret < 0)
1610                        nf_ct_kfree_compat_sysctl_table(pn);
1611        } else
1612                ret = tcp_kmemdup_sysctl_table(pn, tn);
1613
1614        return ret;
1615}
1616
1617static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1618{
1619        return &net->ct.nf_ct_proto.tcp.pn;
1620}
1621
1622struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1623{
1624        .l3proto                = PF_INET,
1625        .l4proto                = IPPROTO_TCP,
1626        .name                   = "tcp",
1627        .pkt_to_tuple           = tcp_pkt_to_tuple,
1628        .invert_tuple           = tcp_invert_tuple,
1629        .print_tuple            = tcp_print_tuple,
1630        .print_conntrack        = tcp_print_conntrack,
1631        .packet                 = tcp_packet,
1632        .get_timeouts           = tcp_get_timeouts,
1633        .new                    = tcp_new,
1634        .error                  = tcp_error,
1635#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1636        .to_nlattr              = tcp_to_nlattr,
1637        .nlattr_size            = tcp_nlattr_size,
1638        .from_nlattr            = nlattr_to_tcp,
1639        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1640        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1641        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1642        .nla_policy             = nf_ct_port_nla_policy,
1643#endif
1644#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1645        .ctnl_timeout           = {
1646                .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1647                .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1648                .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1649                .obj_size       = sizeof(unsigned int) *
1650                                        TCP_CONNTRACK_TIMEOUT_MAX,
1651                .nla_policy     = tcp_timeout_nla_policy,
1652        },
1653#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1654        .init_net               = tcp_init_net,
1655        .get_net_proto          = tcp_get_net_proto,
1656};
1657EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1658
1659struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1660{
1661        .l3proto                = PF_INET6,
1662        .l4proto                = IPPROTO_TCP,
1663        .name                   = "tcp",
1664        .pkt_to_tuple           = tcp_pkt_to_tuple,
1665        .invert_tuple           = tcp_invert_tuple,
1666        .print_tuple            = tcp_print_tuple,
1667        .print_conntrack        = tcp_print_conntrack,
1668        .packet                 = tcp_packet,
1669        .get_timeouts           = tcp_get_timeouts,
1670        .new                    = tcp_new,
1671        .error                  = tcp_error,
1672#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1673        .to_nlattr              = tcp_to_nlattr,
1674        .nlattr_size            = tcp_nlattr_size,
1675        .from_nlattr            = nlattr_to_tcp,
1676        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1677        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1678        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1679        .nla_policy             = nf_ct_port_nla_policy,
1680#endif
1681#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1682        .ctnl_timeout           = {
1683                .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1684                .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1685                .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1686                .obj_size       = sizeof(unsigned int) *
1687                                        TCP_CONNTRACK_TIMEOUT_MAX,
1688                .nla_policy     = tcp_timeout_nla_policy,
1689        },
1690#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1691        .init_net               = tcp_init_net,
1692        .get_net_proto          = tcp_get_net_proto,
1693};
1694EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1695