linux/net/netfilter/nf_conntrack_proto_tcp.c
<<
>>
Prefs
   1/* (C) 1999-2001 Paul `Rusty' Russell
   2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
   3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
   4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/timer.h>
  13#include <linux/module.h>
  14#include <linux/in.h>
  15#include <linux/tcp.h>
  16#include <linux/spinlock.h>
  17#include <linux/skbuff.h>
  18#include <linux/ipv6.h>
  19#include <net/ip6_checksum.h>
  20#include <asm/unaligned.h>
  21
  22#include <net/tcp.h>
  23
  24#include <linux/netfilter.h>
  25#include <linux/netfilter_ipv4.h>
  26#include <linux/netfilter_ipv6.h>
  27#include <net/netfilter/nf_conntrack.h>
  28#include <net/netfilter/nf_conntrack_l4proto.h>
  29#include <net/netfilter/nf_conntrack_ecache.h>
  30#include <net/netfilter/nf_conntrack_seqadj.h>
  31#include <net/netfilter/nf_conntrack_synproxy.h>
  32#include <net/netfilter/nf_log.h>
  33#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  34#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  35
  36/* "Be conservative in what you do,
  37    be liberal in what you accept from others."
  38    If it's non-zero, we mark only out of window RST segments as INVALID. */
  39static int nf_ct_tcp_be_liberal __read_mostly = 0;
  40
  41/* If it is set to zero, we disable picking up already established
  42   connections. */
  43static int nf_ct_tcp_loose __read_mostly = 1;
  44
  45/* Max number of the retransmitted packets without receiving an (acceptable)
  46   ACK from the destination. If this number is reached, a shorter timer
  47   will be started. */
  48static int nf_ct_tcp_max_retrans __read_mostly = 3;
  49
  50  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
  51     closely.  They're more complex. --RR */
  52
  53static const char *const tcp_conntrack_names[] = {
  54        "NONE",
  55        "SYN_SENT",
  56        "SYN_RECV",
  57        "ESTABLISHED",
  58        "FIN_WAIT",
  59        "CLOSE_WAIT",
  60        "LAST_ACK",
  61        "TIME_WAIT",
  62        "CLOSE",
  63        "SYN_SENT2",
  64};
  65
  66#define SECS * HZ
  67#define MINS * 60 SECS
  68#define HOURS * 60 MINS
  69#define DAYS * 24 HOURS
  70
  71static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
  72        [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
  73        [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
  74        [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
  75        [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
  76        [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
  77        [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
  78        [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
  79        [TCP_CONNTRACK_CLOSE]           = 10 SECS,
  80        [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
  81/* RFC1122 says the R2 limit should be at least 100 seconds.
  82   Linux uses 15 packets as limit, which corresponds
  83   to ~13-30min depending on RTO. */
  84        [TCP_CONNTRACK_RETRANS]         = 5 MINS,
  85        [TCP_CONNTRACK_UNACK]           = 5 MINS,
  86};
  87
  88#define sNO TCP_CONNTRACK_NONE
  89#define sSS TCP_CONNTRACK_SYN_SENT
  90#define sSR TCP_CONNTRACK_SYN_RECV
  91#define sES TCP_CONNTRACK_ESTABLISHED
  92#define sFW TCP_CONNTRACK_FIN_WAIT
  93#define sCW TCP_CONNTRACK_CLOSE_WAIT
  94#define sLA TCP_CONNTRACK_LAST_ACK
  95#define sTW TCP_CONNTRACK_TIME_WAIT
  96#define sCL TCP_CONNTRACK_CLOSE
  97#define sS2 TCP_CONNTRACK_SYN_SENT2
  98#define sIV TCP_CONNTRACK_MAX
  99#define sIG TCP_CONNTRACK_IGNORE
 100
 101/* What TCP flags are set from RST/SYN/FIN/ACK. */
 102enum tcp_bit_set {
 103        TCP_SYN_SET,
 104        TCP_SYNACK_SET,
 105        TCP_FIN_SET,
 106        TCP_ACK_SET,
 107        TCP_RST_SET,
 108        TCP_NONE_SET,
 109};
 110
 111/*
 112 * The TCP state transition table needs a few words...
 113 *
 114 * We are the man in the middle. All the packets go through us
 115 * but might get lost in transit to the destination.
 116 * It is assumed that the destinations can't receive segments
 117 * we haven't seen.
 118 *
 119 * The checked segment is in window, but our windows are *not*
 120 * equivalent with the ones of the sender/receiver. We always
 121 * try to guess the state of the current sender.
 122 *
 123 * The meaning of the states are:
 124 *
 125 * NONE:        initial state
 126 * SYN_SENT:    SYN-only packet seen
 127 * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
 128 * SYN_RECV:    SYN-ACK packet seen
 129 * ESTABLISHED: ACK packet seen
 130 * FIN_WAIT:    FIN packet seen
 131 * CLOSE_WAIT:  ACK seen (after FIN)
 132 * LAST_ACK:    FIN seen (after FIN)
 133 * TIME_WAIT:   last ACK seen
 134 * CLOSE:       closed connection (RST)
 135 *
 136 * Packets marked as IGNORED (sIG):
 137 *      if they may be either invalid or valid
 138 *      and the receiver may send back a connection
 139 *      closing RST or a SYN/ACK.
 140 *
 141 * Packets marked as INVALID (sIV):
 142 *      if we regard them as truly invalid packets
 143 */
 144static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 145        {
 146/* ORIGINAL */
 147/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 148/*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
 149/*
 150 *      sNO -> sSS      Initialize a new connection
 151 *      sSS -> sSS      Retransmitted SYN
 152 *      sS2 -> sS2      Late retransmitted SYN
 153 *      sSR -> sIG
 154 *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
 155 *                      are errors. Receiver will reply with RST
 156 *                      and close the connection.
 157 *                      Or we are not in sync and hold a dead connection.
 158 *      sFW -> sIG
 159 *      sCW -> sIG
 160 *      sLA -> sIG
 161 *      sTW -> sSS      Reopened connection (RFC 1122).
 162 *      sCL -> sSS
 163 */
 164/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 165/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
 166/*
 167 *      sNO -> sIV      Too late and no reason to do anything
 168 *      sSS -> sIV      Client can't send SYN and then SYN/ACK
 169 *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
 170 *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
 171 *      sES -> sIV      Invalid SYN/ACK packets sent by the client
 172 *      sFW -> sIV
 173 *      sCW -> sIV
 174 *      sLA -> sIV
 175 *      sTW -> sIV
 176 *      sCL -> sIV
 177 */
 178/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 179/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 180/*
 181 *      sNO -> sIV      Too late and no reason to do anything...
 182 *      sSS -> sIV      Client migth not send FIN in this state:
 183 *                      we enforce waiting for a SYN/ACK reply first.
 184 *      sS2 -> sIV
 185 *      sSR -> sFW      Close started.
 186 *      sES -> sFW
 187 *      sFW -> sLA      FIN seen in both directions, waiting for
 188 *                      the last ACK.
 189 *                      Migth be a retransmitted FIN as well...
 190 *      sCW -> sLA
 191 *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
 192 *      sTW -> sTW
 193 *      sCL -> sCL
 194 */
 195/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 196/*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 197/*
 198 *      sNO -> sES      Assumed.
 199 *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
 200 *      sS2 -> sIV
 201 *      sSR -> sES      Established state is reached.
 202 *      sES -> sES      :-)
 203 *      sFW -> sCW      Normal close request answered by ACK.
 204 *      sCW -> sCW
 205 *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
 206 *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
 207 *      sCL -> sCL
 208 */
 209/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 210/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 211/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 212        },
 213        {
 214/* REPLY */
 215/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 216/*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
 217/*
 218 *      sNO -> sIV      Never reached.
 219 *      sSS -> sS2      Simultaneous open
 220 *      sS2 -> sS2      Retransmitted simultaneous SYN
 221 *      sSR -> sIV      Invalid SYN packets sent by the server
 222 *      sES -> sIV
 223 *      sFW -> sIV
 224 *      sCW -> sIV
 225 *      sLA -> sIV
 226 *      sTW -> sSS      Reopened connection, but server may have switched role
 227 *      sCL -> sIV
 228 */
 229/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 230/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 231/*
 232 *      sSS -> sSR      Standard open.
 233 *      sS2 -> sSR      Simultaneous open
 234 *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
 235 *      sES -> sIG      Late retransmitted SYN/ACK?
 236 *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
 237 *      sCW -> sIG
 238 *      sLA -> sIG
 239 *      sTW -> sIG
 240 *      sCL -> sIG
 241 */
 242/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 243/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 244/*
 245 *      sSS -> sIV      Server might not send FIN in this state.
 246 *      sS2 -> sIV
 247 *      sSR -> sFW      Close started.
 248 *      sES -> sFW
 249 *      sFW -> sLA      FIN seen in both directions.
 250 *      sCW -> sLA
 251 *      sLA -> sLA      Retransmitted FIN.
 252 *      sTW -> sTW
 253 *      sCL -> sCL
 254 */
 255/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 256/*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
 257/*
 258 *      sSS -> sIG      Might be a half-open connection.
 259 *      sS2 -> sIG
 260 *      sSR -> sSR      Might answer late resent SYN.
 261 *      sES -> sES      :-)
 262 *      sFW -> sCW      Normal close request answered by ACK.
 263 *      sCW -> sCW
 264 *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
 265 *      sTW -> sTW      Retransmitted last ACK.
 266 *      sCL -> sCL
 267 */
 268/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 269/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 270/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 271        }
 272};
 273
 274static inline struct nf_tcp_net *tcp_pernet(struct net *net)
 275{
 276        return &net->ct.nf_ct_proto.tcp;
 277}
 278
 279static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 280                             struct net *net, struct nf_conntrack_tuple *tuple)
 281{
 282        const struct tcphdr *hp;
 283        struct tcphdr _hdr;
 284
 285        /* Actually only need first 4 bytes to get ports. */
 286        hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
 287        if (hp == NULL)
 288                return false;
 289
 290        tuple->src.u.tcp.port = hp->source;
 291        tuple->dst.u.tcp.port = hp->dest;
 292
 293        return true;
 294}
 295
 296static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 297                             const struct nf_conntrack_tuple *orig)
 298{
 299        tuple->src.u.tcp.port = orig->dst.u.tcp.port;
 300        tuple->dst.u.tcp.port = orig->src.u.tcp.port;
 301        return true;
 302}
 303
 304#ifdef CONFIG_NF_CONNTRACK_PROCFS
 305/* Print out the private part of the conntrack. */
 306static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 307{
 308        if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
 309                return;
 310
 311        seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
 312}
 313#endif
 314
 315static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 316{
 317        if (tcph->rst) return TCP_RST_SET;
 318        else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
 319        else if (tcph->fin) return TCP_FIN_SET;
 320        else if (tcph->ack) return TCP_ACK_SET;
 321        else return TCP_NONE_SET;
 322}
 323
 324/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
 325   in IP Filter' by Guido van Rooij.
 326
 327   http://www.sane.nl/events/sane2000/papers.html
 328   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
 329
 330   The boundaries and the conditions are changed according to RFC793:
 331   the packet must intersect the window (i.e. segments may be
 332   after the right or before the left edge) and thus receivers may ACK
 333   segments after the right edge of the window.
 334
 335        td_maxend = max(sack + max(win,1)) seen in reply packets
 336        td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
 337        td_maxwin += seq + len - sender.td_maxend
 338                        if seq + len > sender.td_maxend
 339        td_end    = max(seq + len) seen in sent packets
 340
 341   I.   Upper bound for valid data:     seq <= sender.td_maxend
 342   II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
 343   III. Upper bound for valid (s)ack:   sack <= receiver.td_end
 344   IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
 345
 346   where sack is the highest right edge of sack block found in the packet
 347   or ack in the case of packet without SACK option.
 348
 349   The upper bound limit for a valid (s)ack is not ignored -
 350   we doesn't have to deal with fragments.
 351*/
 352
 353static inline __u32 segment_seq_plus_len(__u32 seq,
 354                                         size_t len,
 355                                         unsigned int dataoff,
 356                                         const struct tcphdr *tcph)
 357{
 358        /* XXX Should I use payload length field in IP/IPv6 header ?
 359         * - YK */
 360        return (seq + len - dataoff - tcph->doff*4
 361                + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
 362}
 363
 364/* Fixme: what about big packets? */
 365#define MAXACKWINCONST                  66000
 366#define MAXACKWINDOW(sender)                                            \
 367        ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
 368                                              : MAXACKWINCONST)
 369
 370/*
 371 * Simplified tcp_parse_options routine from tcp_input.c
 372 */
 373static void tcp_options(const struct sk_buff *skb,
 374                        unsigned int dataoff,
 375                        const struct tcphdr *tcph,
 376                        struct ip_ct_tcp_state *state)
 377{
 378        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 379        const unsigned char *ptr;
 380        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 381
 382        if (!length)
 383                return;
 384
 385        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 386                                 length, buff);
 387        BUG_ON(ptr == NULL);
 388
 389        state->td_scale =
 390        state->flags = 0;
 391
 392        while (length > 0) {
 393                int opcode=*ptr++;
 394                int opsize;
 395
 396                switch (opcode) {
 397                case TCPOPT_EOL:
 398                        return;
 399                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 400                        length--;
 401                        continue;
 402                default:
 403                        if (length < 2)
 404                                return;
 405                        opsize=*ptr++;
 406                        if (opsize < 2) /* "silly options" */
 407                                return;
 408                        if (opsize > length)
 409                                return; /* don't parse partial options */
 410
 411                        if (opcode == TCPOPT_SACK_PERM
 412                            && opsize == TCPOLEN_SACK_PERM)
 413                                state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
 414                        else if (opcode == TCPOPT_WINDOW
 415                                 && opsize == TCPOLEN_WINDOW) {
 416                                state->td_scale = *(u_int8_t *)ptr;
 417
 418                                if (state->td_scale > TCP_MAX_WSCALE)
 419                                        state->td_scale = TCP_MAX_WSCALE;
 420
 421                                state->flags |=
 422                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 423                        }
 424                        ptr += opsize - 2;
 425                        length -= opsize;
 426                }
 427        }
 428}
 429
 430static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 431                     const struct tcphdr *tcph, __u32 *sack)
 432{
 433        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 434        const unsigned char *ptr;
 435        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 436        __u32 tmp;
 437
 438        if (!length)
 439                return;
 440
 441        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 442                                 length, buff);
 443        BUG_ON(ptr == NULL);
 444
 445        /* Fast path for timestamp-only option */
 446        if (length == TCPOLEN_TSTAMP_ALIGNED
 447            && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
 448                                       | (TCPOPT_NOP << 16)
 449                                       | (TCPOPT_TIMESTAMP << 8)
 450                                       | TCPOLEN_TIMESTAMP))
 451                return;
 452
 453        while (length > 0) {
 454                int opcode = *ptr++;
 455                int opsize, i;
 456
 457                switch (opcode) {
 458                case TCPOPT_EOL:
 459                        return;
 460                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 461                        length--;
 462                        continue;
 463                default:
 464                        if (length < 2)
 465                                return;
 466                        opsize = *ptr++;
 467                        if (opsize < 2) /* "silly options" */
 468                                return;
 469                        if (opsize > length)
 470                                return; /* don't parse partial options */
 471
 472                        if (opcode == TCPOPT_SACK
 473                            && opsize >= (TCPOLEN_SACK_BASE
 474                                          + TCPOLEN_SACK_PERBLOCK)
 475                            && !((opsize - TCPOLEN_SACK_BASE)
 476                                 % TCPOLEN_SACK_PERBLOCK)) {
 477                                for (i = 0;
 478                                     i < (opsize - TCPOLEN_SACK_BASE);
 479                                     i += TCPOLEN_SACK_PERBLOCK) {
 480                                        tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
 481
 482                                        if (after(tmp, *sack))
 483                                                *sack = tmp;
 484                                }
 485                                return;
 486                        }
 487                        ptr += opsize - 2;
 488                        length -= opsize;
 489                }
 490        }
 491}
 492
 493static bool tcp_in_window(const struct nf_conn *ct,
 494                          struct ip_ct_tcp *state,
 495                          enum ip_conntrack_dir dir,
 496                          unsigned int index,
 497                          const struct sk_buff *skb,
 498                          unsigned int dataoff,
 499                          const struct tcphdr *tcph)
 500{
 501        struct net *net = nf_ct_net(ct);
 502        struct nf_tcp_net *tn = tcp_pernet(net);
 503        struct ip_ct_tcp_state *sender = &state->seen[dir];
 504        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 505        const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 506        __u32 seq, ack, sack, end, win, swin;
 507        s32 receiver_offset;
 508        bool res, in_recv_win;
 509
 510        /*
 511         * Get the required data from the packet.
 512         */
 513        seq = ntohl(tcph->seq);
 514        ack = sack = ntohl(tcph->ack_seq);
 515        win = ntohs(tcph->window);
 516        end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
 517
 518        if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
 519                tcp_sack(skb, dataoff, tcph, &sack);
 520
 521        /* Take into account NAT sequence number mangling */
 522        receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
 523        ack -= receiver_offset;
 524        sack -= receiver_offset;
 525
 526        pr_debug("tcp_in_window: START\n");
 527        pr_debug("tcp_in_window: ");
 528        nf_ct_dump_tuple(tuple);
 529        pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
 530                 seq, ack, receiver_offset, sack, receiver_offset, win, end);
 531        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 532                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 533                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 534                 sender->td_scale,
 535                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 536                 receiver->td_scale);
 537
 538        if (sender->td_maxwin == 0) {
 539                /*
 540                 * Initialize sender data.
 541                 */
 542                if (tcph->syn) {
 543                        /*
 544                         * SYN-ACK in reply to a SYN
 545                         * or SYN from reply direction in simultaneous open.
 546                         */
 547                        sender->td_end =
 548                        sender->td_maxend = end;
 549                        sender->td_maxwin = (win == 0 ? 1 : win);
 550
 551                        tcp_options(skb, dataoff, tcph, sender);
 552                        /*
 553                         * RFC 1323:
 554                         * Both sides must send the Window Scale option
 555                         * to enable window scaling in either direction.
 556                         */
 557                        if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
 558                              && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
 559                                sender->td_scale =
 560                                receiver->td_scale = 0;
 561                        if (!tcph->ack)
 562                                /* Simultaneous open */
 563                                return true;
 564                } else {
 565                        /*
 566                         * We are in the middle of a connection,
 567                         * its history is lost for us.
 568                         * Let's try to use the data from the packet.
 569                         */
 570                        sender->td_end = end;
 571                        swin = win << sender->td_scale;
 572                        sender->td_maxwin = (swin == 0 ? 1 : swin);
 573                        sender->td_maxend = end + sender->td_maxwin;
 574                        /*
 575                         * We haven't seen traffic in the other direction yet
 576                         * but we have to tweak window tracking to pass III
 577                         * and IV until that happens.
 578                         */
 579                        if (receiver->td_maxwin == 0)
 580                                receiver->td_end = receiver->td_maxend = sack;
 581                }
 582        } else if (((state->state == TCP_CONNTRACK_SYN_SENT
 583                     && dir == IP_CT_DIR_ORIGINAL)
 584                   || (state->state == TCP_CONNTRACK_SYN_RECV
 585                     && dir == IP_CT_DIR_REPLY))
 586                   && after(end, sender->td_end)) {
 587                /*
 588                 * RFC 793: "if a TCP is reinitialized ... then it need
 589                 * not wait at all; it must only be sure to use sequence
 590                 * numbers larger than those recently used."
 591                 */
 592                sender->td_end =
 593                sender->td_maxend = end;
 594                sender->td_maxwin = (win == 0 ? 1 : win);
 595
 596                tcp_options(skb, dataoff, tcph, sender);
 597        }
 598
 599        if (!(tcph->ack)) {
 600                /*
 601                 * If there is no ACK, just pretend it was set and OK.
 602                 */
 603                ack = sack = receiver->td_end;
 604        } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
 605                    (TCP_FLAG_ACK|TCP_FLAG_RST))
 606                   && (ack == 0)) {
 607                /*
 608                 * Broken TCP stacks, that set ACK in RST packets as well
 609                 * with zero ack value.
 610                 */
 611                ack = sack = receiver->td_end;
 612        }
 613
 614        if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
 615                /*
 616                 * RST sent answering SYN.
 617                 */
 618                seq = end = sender->td_end;
 619
 620        pr_debug("tcp_in_window: ");
 621        nf_ct_dump_tuple(tuple);
 622        pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
 623                 seq, ack, receiver_offset, sack, receiver_offset, win, end);
 624        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 625                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 626                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 627                 sender->td_scale,
 628                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 629                 receiver->td_scale);
 630
 631        /* Is the ending sequence in the receive window (if available)? */
 632        in_recv_win = !receiver->td_maxwin ||
 633                      after(end, sender->td_end - receiver->td_maxwin - 1);
 634
 635        pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 636                 before(seq, sender->td_maxend + 1),
 637                 (in_recv_win ? 1 : 0),
 638                 before(sack, receiver->td_end + 1),
 639                 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 640
 641        if (before(seq, sender->td_maxend + 1) &&
 642            in_recv_win &&
 643            before(sack, receiver->td_end + 1) &&
 644            after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 645                /*
 646                 * Take into account window scaling (RFC 1323).
 647                 */
 648                if (!tcph->syn)
 649                        win <<= sender->td_scale;
 650
 651                /*
 652                 * Update sender data.
 653                 */
 654                swin = win + (sack - ack);
 655                if (sender->td_maxwin < swin)
 656                        sender->td_maxwin = swin;
 657                if (after(end, sender->td_end)) {
 658                        sender->td_end = end;
 659                        sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 660                }
 661                if (tcph->ack) {
 662                        if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
 663                                sender->td_maxack = ack;
 664                                sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
 665                        } else if (after(ack, sender->td_maxack))
 666                                sender->td_maxack = ack;
 667                }
 668
 669                /*
 670                 * Update receiver data.
 671                 */
 672                if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
 673                        receiver->td_maxwin += end - sender->td_maxend;
 674                if (after(sack + win, receiver->td_maxend - 1)) {
 675                        receiver->td_maxend = sack + win;
 676                        if (win == 0)
 677                                receiver->td_maxend++;
 678                }
 679                if (ack == receiver->td_end)
 680                        receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 681
 682                /*
 683                 * Check retransmissions.
 684                 */
 685                if (index == TCP_ACK_SET) {
 686                        if (state->last_dir == dir
 687                            && state->last_seq == seq
 688                            && state->last_ack == ack
 689                            && state->last_end == end
 690                            && state->last_win == win)
 691                                state->retrans++;
 692                        else {
 693                                state->last_dir = dir;
 694                                state->last_seq = seq;
 695                                state->last_ack = ack;
 696                                state->last_end = end;
 697                                state->last_win = win;
 698                                state->retrans = 0;
 699                        }
 700                }
 701                res = true;
 702        } else {
 703                res = false;
 704                if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 705                    tn->tcp_be_liberal)
 706                        res = true;
 707                if (!res) {
 708                        nf_ct_l4proto_log_invalid(skb, ct,
 709                        "%s",
 710                        before(seq, sender->td_maxend + 1) ?
 711                        in_recv_win ?
 712                        before(sack, receiver->td_end + 1) ?
 713                        after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
 714                        : "ACK is under the lower bound (possible overly delayed ACK)"
 715                        : "ACK is over the upper bound (ACKed data not seen yet)"
 716                        : "SEQ is under the lower bound (already ACKed data retransmitted)"
 717                        : "SEQ is over the upper bound (over the window of the receiver)");
 718                }
 719        }
 720
 721        pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
 722                 "receiver end=%u maxend=%u maxwin=%u\n",
 723                 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
 724                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
 725
 726        return res;
 727}
 728
 729/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
 730static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
 731                                 TCPHDR_URG) + 1] =
 732{
 733        [TCPHDR_SYN]                            = 1,
 734        [TCPHDR_SYN|TCPHDR_URG]                 = 1,
 735        [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
 736        [TCPHDR_RST]                            = 1,
 737        [TCPHDR_RST|TCPHDR_ACK]                 = 1,
 738        [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
 739        [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
 740        [TCPHDR_ACK]                            = 1,
 741        [TCPHDR_ACK|TCPHDR_URG]                 = 1,
 742};
 743
 744static void tcp_error_log(const struct sk_buff *skb, struct net *net,
 745                          u8 pf, const char *msg)
 746{
 747        nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
 748}
 749
 750/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
 751static int tcp_error(struct net *net, struct nf_conn *tmpl,
 752                     struct sk_buff *skb,
 753                     unsigned int dataoff,
 754                     u_int8_t pf,
 755                     unsigned int hooknum)
 756{
 757        const struct tcphdr *th;
 758        struct tcphdr _tcph;
 759        unsigned int tcplen = skb->len - dataoff;
 760        u_int8_t tcpflags;
 761
 762        /* Smaller that minimal TCP header? */
 763        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 764        if (th == NULL) {
 765                tcp_error_log(skb, net, pf, "short packet");
 766                return -NF_ACCEPT;
 767        }
 768
 769        /* Not whole TCP header or malformed packet */
 770        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
 771                tcp_error_log(skb, net, pf, "truncated packet");
 772                return -NF_ACCEPT;
 773        }
 774
 775        /* Checksum invalid? Ignore.
 776         * We skip checking packets on the outgoing path
 777         * because the checksum is assumed to be correct.
 778         */
 779        /* FIXME: Source route IP option packets --RR */
 780        if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 781            nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 782                tcp_error_log(skb, net, pf, "bad checksum");
 783                return -NF_ACCEPT;
 784        }
 785
 786        /* Check TCP flags. */
 787        tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 788        if (!tcp_valid_flags[tcpflags]) {
 789                tcp_error_log(skb, net, pf, "invalid tcp flag combination");
 790                return -NF_ACCEPT;
 791        }
 792
 793        return NF_ACCEPT;
 794}
 795
 796static unsigned int *tcp_get_timeouts(struct net *net)
 797{
 798        return tcp_pernet(net)->timeouts;
 799}
 800
 801/* Returns verdict for packet, or -1 for invalid. */
 802static int tcp_packet(struct nf_conn *ct,
 803                      const struct sk_buff *skb,
 804                      unsigned int dataoff,
 805                      enum ip_conntrack_info ctinfo,
 806                      unsigned int *timeouts)
 807{
 808        struct net *net = nf_ct_net(ct);
 809        struct nf_tcp_net *tn = tcp_pernet(net);
 810        struct nf_conntrack_tuple *tuple;
 811        enum tcp_conntrack new_state, old_state;
 812        enum ip_conntrack_dir dir;
 813        const struct tcphdr *th;
 814        struct tcphdr _tcph;
 815        unsigned long timeout;
 816        unsigned int index;
 817
 818        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 819        BUG_ON(th == NULL);
 820
 821        spin_lock_bh(&ct->lock);
 822        old_state = ct->proto.tcp.state;
 823        dir = CTINFO2DIR(ctinfo);
 824        index = get_conntrack_index(th);
 825        new_state = tcp_conntracks[dir][index][old_state];
 826        tuple = &ct->tuplehash[dir].tuple;
 827
 828        switch (new_state) {
 829        case TCP_CONNTRACK_SYN_SENT:
 830                if (old_state < TCP_CONNTRACK_TIME_WAIT)
 831                        break;
 832                /* RFC 1122: "When a connection is closed actively,
 833                 * it MUST linger in TIME-WAIT state for a time 2xMSL
 834                 * (Maximum Segment Lifetime). However, it MAY accept
 835                 * a new SYN from the remote TCP to reopen the connection
 836                 * directly from TIME-WAIT state, if..."
 837                 * We ignore the conditions because we are in the
 838                 * TIME-WAIT state anyway.
 839                 *
 840                 * Handle aborted connections: we and the server
 841                 * think there is an existing connection but the client
 842                 * aborts it and starts a new one.
 843                 */
 844                if (((ct->proto.tcp.seen[dir].flags
 845                      | ct->proto.tcp.seen[!dir].flags)
 846                     & IP_CT_TCP_FLAG_CLOSE_INIT)
 847                    || (ct->proto.tcp.last_dir == dir
 848                        && ct->proto.tcp.last_index == TCP_RST_SET)) {
 849                        /* Attempt to reopen a closed/aborted connection.
 850                         * Delete this connection and look up again. */
 851                        spin_unlock_bh(&ct->lock);
 852
 853                        /* Only repeat if we can actually remove the timer.
 854                         * Destruction may already be in progress in process
 855                         * context and we must give it a chance to terminate.
 856                         */
 857                        if (nf_ct_kill(ct))
 858                                return -NF_REPEAT;
 859                        return NF_DROP;
 860                }
 861                /* Fall through */
 862        case TCP_CONNTRACK_IGNORE:
 863                /* Ignored packets:
 864                 *
 865                 * Our connection entry may be out of sync, so ignore
 866                 * packets which may signal the real connection between
 867                 * the client and the server.
 868                 *
 869                 * a) SYN in ORIGINAL
 870                 * b) SYN/ACK in REPLY
 871                 * c) ACK in reply direction after initial SYN in original.
 872                 *
 873                 * If the ignored packet is invalid, the receiver will send
 874                 * a RST we'll catch below.
 875                 */
 876                if (index == TCP_SYNACK_SET
 877                    && ct->proto.tcp.last_index == TCP_SYN_SET
 878                    && ct->proto.tcp.last_dir != dir
 879                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 880                        /* b) This SYN/ACK acknowledges a SYN that we earlier
 881                         * ignored as invalid. This means that the client and
 882                         * the server are both in sync, while the firewall is
 883                         * not. We get in sync from the previously annotated
 884                         * values.
 885                         */
 886                        old_state = TCP_CONNTRACK_SYN_SENT;
 887                        new_state = TCP_CONNTRACK_SYN_RECV;
 888                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
 889                                ct->proto.tcp.last_end;
 890                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
 891                                ct->proto.tcp.last_end;
 892                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
 893                                ct->proto.tcp.last_win == 0 ?
 894                                        1 : ct->proto.tcp.last_win;
 895                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
 896                                ct->proto.tcp.last_wscale;
 897                        ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
 898                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
 899                                ct->proto.tcp.last_flags;
 900                        memset(&ct->proto.tcp.seen[dir], 0,
 901                               sizeof(struct ip_ct_tcp_state));
 902                        break;
 903                }
 904                ct->proto.tcp.last_index = index;
 905                ct->proto.tcp.last_dir = dir;
 906                ct->proto.tcp.last_seq = ntohl(th->seq);
 907                ct->proto.tcp.last_end =
 908                    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 909                ct->proto.tcp.last_win = ntohs(th->window);
 910
 911                /* a) This is a SYN in ORIGINAL. The client and the server
 912                 * may be in sync but we are not. In that case, we annotate
 913                 * the TCP options and let the packet go through. If it is a
 914                 * valid SYN packet, the server will reply with a SYN/ACK, and
 915                 * then we'll get in sync. Otherwise, the server potentially
 916                 * responds with a challenge ACK if implementing RFC5961.
 917                 */
 918                if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
 919                        struct ip_ct_tcp_state seen = {};
 920
 921                        ct->proto.tcp.last_flags =
 922                        ct->proto.tcp.last_wscale = 0;
 923                        tcp_options(skb, dataoff, th, &seen);
 924                        if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
 925                                ct->proto.tcp.last_flags |=
 926                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 927                                ct->proto.tcp.last_wscale = seen.td_scale;
 928                        }
 929                        if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
 930                                ct->proto.tcp.last_flags |=
 931                                        IP_CT_TCP_FLAG_SACK_PERM;
 932                        }
 933                        /* Mark the potential for RFC5961 challenge ACK,
 934                         * this pose a special problem for LAST_ACK state
 935                         * as ACK is intrepretated as ACKing last FIN.
 936                         */
 937                        if (old_state == TCP_CONNTRACK_LAST_ACK)
 938                                ct->proto.tcp.last_flags |=
 939                                        IP_CT_EXP_CHALLENGE_ACK;
 940                }
 941                spin_unlock_bh(&ct->lock);
 942                nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
 943                                          "state %s ", tcp_conntrack_names[old_state]);
 944                return NF_ACCEPT;
 945        case TCP_CONNTRACK_MAX:
 946                /* Special case for SYN proxy: when the SYN to the server or
 947                 * the SYN/ACK from the server is lost, the client may transmit
 948                 * a keep-alive packet while in SYN_SENT state. This needs to
 949                 * be associated with the original conntrack entry in order to
 950                 * generate a new SYN with the correct sequence number.
 951                 */
 952                if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
 953                    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
 954                    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
 955                    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
 956                        pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
 957                        spin_unlock_bh(&ct->lock);
 958                        return NF_ACCEPT;
 959                }
 960
 961                /* Invalid packet */
 962                pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 963                         dir, get_conntrack_index(th), old_state);
 964                spin_unlock_bh(&ct->lock);
 965                nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
 966                return -NF_ACCEPT;
 967        case TCP_CONNTRACK_TIME_WAIT:
 968                /* RFC5961 compliance cause stack to send "challenge-ACK"
 969                 * e.g. in response to spurious SYNs.  Conntrack MUST
 970                 * not believe this ACK is acking last FIN.
 971                 */
 972                if (old_state == TCP_CONNTRACK_LAST_ACK &&
 973                    index == TCP_ACK_SET &&
 974                    ct->proto.tcp.last_dir != dir &&
 975                    ct->proto.tcp.last_index == TCP_SYN_SET &&
 976                    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
 977                        /* Detected RFC5961 challenge ACK */
 978                        ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
 979                        spin_unlock_bh(&ct->lock);
 980                        nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
 981                        return NF_ACCEPT; /* Don't change state */
 982                }
 983                break;
 984        case TCP_CONNTRACK_SYN_SENT2:
 985                /* tcp_conntracks table is not smart enough to handle
 986                 * simultaneous open.
 987                 */
 988                ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
 989                break;
 990        case TCP_CONNTRACK_SYN_RECV:
 991                if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
 992                    ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
 993                        new_state = TCP_CONNTRACK_ESTABLISHED;
 994                break;
 995        case TCP_CONNTRACK_CLOSE:
 996                if (index == TCP_RST_SET
 997                    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
 998                    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
 999                        /* Invalid RST  */
1000                        spin_unlock_bh(&ct->lock);
1001                        nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
1002                        return -NF_ACCEPT;
1003                }
1004                if (index == TCP_RST_SET
1005                    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1006                         && ct->proto.tcp.last_index == TCP_SYN_SET)
1007                        || (!test_bit(IPS_ASSURED_BIT, &ct->status)
1008                            && ct->proto.tcp.last_index == TCP_ACK_SET))
1009                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1010                        /* RST sent to invalid SYN or ACK we had let through
1011                         * at a) and c) above:
1012                         *
1013                         * a) SYN was in window then
1014                         * c) we hold a half-open connection.
1015                         *
1016                         * Delete our connection entry.
1017                         * We skip window checking, because packet might ACK
1018                         * segments we ignored. */
1019                        goto in_window;
1020                }
1021                /* Just fall through */
1022        default:
1023                /* Keep compilers happy. */
1024                break;
1025        }
1026
1027        if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1028                           skb, dataoff, th)) {
1029                spin_unlock_bh(&ct->lock);
1030                return -NF_ACCEPT;
1031        }
1032     in_window:
1033        /* From now on we have got in-window packets */
1034        ct->proto.tcp.last_index = index;
1035        ct->proto.tcp.last_dir = dir;
1036
1037        pr_debug("tcp_conntracks: ");
1038        nf_ct_dump_tuple(tuple);
1039        pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1040                 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1041                 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1042                 old_state, new_state);
1043
1044        ct->proto.tcp.state = new_state;
1045        if (old_state != new_state
1046            && new_state == TCP_CONNTRACK_FIN_WAIT)
1047                ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1048
1049        if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1050            timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1051                timeout = timeouts[TCP_CONNTRACK_RETRANS];
1052        else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1053                 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1054                 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1055                timeout = timeouts[TCP_CONNTRACK_UNACK];
1056        else if (ct->proto.tcp.last_win == 0 &&
1057                 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1058                timeout = timeouts[TCP_CONNTRACK_RETRANS];
1059        else
1060                timeout = timeouts[new_state];
1061        spin_unlock_bh(&ct->lock);
1062
1063        if (new_state != old_state)
1064                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1065
1066        if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1067                /* If only reply is a RST, we can consider ourselves not to
1068                   have an established connection: this is a fairly common
1069                   problem case, so we can delete the conntrack
1070                   immediately.  --RR */
1071                if (th->rst) {
1072                        nf_ct_kill_acct(ct, ctinfo, skb);
1073                        return NF_ACCEPT;
1074                }
1075                /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1076                 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1077                 */
1078                if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1079                    timeout > timeouts[TCP_CONNTRACK_UNACK])
1080                        timeout = timeouts[TCP_CONNTRACK_UNACK];
1081        } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1082                   && (old_state == TCP_CONNTRACK_SYN_RECV
1083                       || old_state == TCP_CONNTRACK_ESTABLISHED)
1084                   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1085                /* Set ASSURED if we see see valid ack in ESTABLISHED
1086                   after SYN_RECV or a valid answer for a picked up
1087                   connection. */
1088                set_bit(IPS_ASSURED_BIT, &ct->status);
1089                nf_conntrack_event_cache(IPCT_ASSURED, ct);
1090        }
1091        nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1092
1093        return NF_ACCEPT;
1094}
1095
1096/* Called when a new connection for this protocol found. */
1097static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1098                    unsigned int dataoff, unsigned int *timeouts)
1099{
1100        enum tcp_conntrack new_state;
1101        const struct tcphdr *th;
1102        struct tcphdr _tcph;
1103        struct net *net = nf_ct_net(ct);
1104        struct nf_tcp_net *tn = tcp_pernet(net);
1105        const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1106        const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1107
1108        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1109        BUG_ON(th == NULL);
1110
1111        /* Don't need lock here: this conntrack not in circulation yet */
1112        new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1113
1114        /* Invalid: delete conntrack */
1115        if (new_state >= TCP_CONNTRACK_MAX) {
1116                pr_debug("nf_ct_tcp: invalid new deleting.\n");
1117                return false;
1118        }
1119
1120        if (new_state == TCP_CONNTRACK_SYN_SENT) {
1121                memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1122                /* SYN packet */
1123                ct->proto.tcp.seen[0].td_end =
1124                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1125                                             dataoff, th);
1126                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1127                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1128                        ct->proto.tcp.seen[0].td_maxwin = 1;
1129                ct->proto.tcp.seen[0].td_maxend =
1130                        ct->proto.tcp.seen[0].td_end;
1131
1132                tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1133        } else if (tn->tcp_loose == 0) {
1134                /* Don't try to pick up connections. */
1135                return false;
1136        } else {
1137                memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1138                /*
1139                 * We are in the middle of a connection,
1140                 * its history is lost for us.
1141                 * Let's try to use the data from the packet.
1142                 */
1143                ct->proto.tcp.seen[0].td_end =
1144                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1145                                             dataoff, th);
1146                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1147                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1148                        ct->proto.tcp.seen[0].td_maxwin = 1;
1149                ct->proto.tcp.seen[0].td_maxend =
1150                        ct->proto.tcp.seen[0].td_end +
1151                        ct->proto.tcp.seen[0].td_maxwin;
1152
1153                /* We assume SACK and liberal window checking to handle
1154                 * window scaling */
1155                ct->proto.tcp.seen[0].flags =
1156                ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1157                                              IP_CT_TCP_FLAG_BE_LIBERAL;
1158        }
1159
1160        /* tcp_packet will set them */
1161        ct->proto.tcp.last_index = TCP_NONE_SET;
1162
1163        pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1164                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1165                 sender->td_end, sender->td_maxend, sender->td_maxwin,
1166                 sender->td_scale,
1167                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1168                 receiver->td_scale);
1169        return true;
1170}
1171
1172static bool tcp_can_early_drop(const struct nf_conn *ct)
1173{
1174        switch (ct->proto.tcp.state) {
1175        case TCP_CONNTRACK_FIN_WAIT:
1176        case TCP_CONNTRACK_LAST_ACK:
1177        case TCP_CONNTRACK_TIME_WAIT:
1178        case TCP_CONNTRACK_CLOSE:
1179        case TCP_CONNTRACK_CLOSE_WAIT:
1180                return true;
1181        default:
1182                break;
1183        }
1184
1185        return false;
1186}
1187
1188#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1189
1190#include <linux/netfilter/nfnetlink.h>
1191#include <linux/netfilter/nfnetlink_conntrack.h>
1192
1193static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1194                         struct nf_conn *ct)
1195{
1196        struct nlattr *nest_parms;
1197        struct nf_ct_tcp_flags tmp = {};
1198
1199        spin_lock_bh(&ct->lock);
1200        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1201        if (!nest_parms)
1202                goto nla_put_failure;
1203
1204        if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1205            nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1206                       ct->proto.tcp.seen[0].td_scale) ||
1207            nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1208                       ct->proto.tcp.seen[1].td_scale))
1209                goto nla_put_failure;
1210
1211        tmp.flags = ct->proto.tcp.seen[0].flags;
1212        if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1213                    sizeof(struct nf_ct_tcp_flags), &tmp))
1214                goto nla_put_failure;
1215
1216        tmp.flags = ct->proto.tcp.seen[1].flags;
1217        if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1218                    sizeof(struct nf_ct_tcp_flags), &tmp))
1219                goto nla_put_failure;
1220        spin_unlock_bh(&ct->lock);
1221
1222        nla_nest_end(skb, nest_parms);
1223
1224        return 0;
1225
1226nla_put_failure:
1227        spin_unlock_bh(&ct->lock);
1228        return -1;
1229}
1230
1231static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1232        [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1233        [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1234        [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1235        [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1236        [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1237};
1238
1239#define TCP_NLATTR_SIZE ( \
1240        NLA_ALIGN(NLA_HDRLEN + 1) + \
1241        NLA_ALIGN(NLA_HDRLEN + 1) + \
1242        NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
1243        NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
1244
1245static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1246{
1247        struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1248        struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1249        int err;
1250
1251        /* updates could not contain anything about the private
1252         * protocol info, in that case skip the parsing */
1253        if (!pattr)
1254                return 0;
1255
1256        err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1257                               tcp_nla_policy, NULL);
1258        if (err < 0)
1259                return err;
1260
1261        if (tb[CTA_PROTOINFO_TCP_STATE] &&
1262            nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1263                return -EINVAL;
1264
1265        spin_lock_bh(&ct->lock);
1266        if (tb[CTA_PROTOINFO_TCP_STATE])
1267                ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1268
1269        if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1270                struct nf_ct_tcp_flags *attr =
1271                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1272                ct->proto.tcp.seen[0].flags &= ~attr->mask;
1273                ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1274        }
1275
1276        if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1277                struct nf_ct_tcp_flags *attr =
1278                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1279                ct->proto.tcp.seen[1].flags &= ~attr->mask;
1280                ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1281        }
1282
1283        if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1284            tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1285            ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1286            ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1287                ct->proto.tcp.seen[0].td_scale =
1288                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1289                ct->proto.tcp.seen[1].td_scale =
1290                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1291        }
1292        spin_unlock_bh(&ct->lock);
1293
1294        return 0;
1295}
1296
1297static unsigned int tcp_nlattr_tuple_size(void)
1298{
1299        static unsigned int size __read_mostly;
1300
1301        if (!size)
1302                size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1303
1304        return size;
1305}
1306#endif
1307
1308#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1309
1310#include <linux/netfilter/nfnetlink.h>
1311#include <linux/netfilter/nfnetlink_cttimeout.h>
1312
1313static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1314                                     struct net *net, void *data)
1315{
1316        unsigned int *timeouts = data;
1317        struct nf_tcp_net *tn = tcp_pernet(net);
1318        int i;
1319
1320        /* set default TCP timeouts. */
1321        for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1322                timeouts[i] = tn->timeouts[i];
1323
1324        if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1325                timeouts[TCP_CONNTRACK_SYN_SENT] =
1326                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1327        }
1328        if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1329                timeouts[TCP_CONNTRACK_SYN_RECV] =
1330                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1331        }
1332        if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1333                timeouts[TCP_CONNTRACK_ESTABLISHED] =
1334                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1335        }
1336        if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1337                timeouts[TCP_CONNTRACK_FIN_WAIT] =
1338                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1339        }
1340        if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1341                timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1342                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1343        }
1344        if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1345                timeouts[TCP_CONNTRACK_LAST_ACK] =
1346                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1347        }
1348        if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1349                timeouts[TCP_CONNTRACK_TIME_WAIT] =
1350                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1351        }
1352        if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1353                timeouts[TCP_CONNTRACK_CLOSE] =
1354                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1355        }
1356        if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1357                timeouts[TCP_CONNTRACK_SYN_SENT2] =
1358                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1359        }
1360        if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1361                timeouts[TCP_CONNTRACK_RETRANS] =
1362                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1363        }
1364        if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1365                timeouts[TCP_CONNTRACK_UNACK] =
1366                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1367        }
1368        return 0;
1369}
1370
1371static int
1372tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1373{
1374        const unsigned int *timeouts = data;
1375
1376        if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1377                        htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1378            nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1379                         htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1380            nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1381                         htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1382            nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1383                         htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1384            nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1385                         htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1386            nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1387                         htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1388            nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1389                         htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1390            nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1391                         htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1392            nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1393                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1394            nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1395                         htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1396            nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1397                         htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1398                goto nla_put_failure;
1399        return 0;
1400
1401nla_put_failure:
1402        return -ENOSPC;
1403}
1404
1405static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1406        [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1407        [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1408        [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1409        [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1410        [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1411        [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1412        [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1413        [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1414        [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1415        [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1416        [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1417};
1418#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1419
1420#ifdef CONFIG_SYSCTL
1421static struct ctl_table tcp_sysctl_table[] = {
1422        {
1423                .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1424                .maxlen         = sizeof(unsigned int),
1425                .mode           = 0644,
1426                .proc_handler   = proc_dointvec_jiffies,
1427        },
1428        {
1429                .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1430                .maxlen         = sizeof(unsigned int),
1431                .mode           = 0644,
1432                .proc_handler   = proc_dointvec_jiffies,
1433        },
1434        {
1435                .procname       = "nf_conntrack_tcp_timeout_established",
1436                .maxlen         = sizeof(unsigned int),
1437                .mode           = 0644,
1438                .proc_handler   = proc_dointvec_jiffies,
1439        },
1440        {
1441                .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1442                .maxlen         = sizeof(unsigned int),
1443                .mode           = 0644,
1444                .proc_handler   = proc_dointvec_jiffies,
1445        },
1446        {
1447                .procname       = "nf_conntrack_tcp_timeout_close_wait",
1448                .maxlen         = sizeof(unsigned int),
1449                .mode           = 0644,
1450                .proc_handler   = proc_dointvec_jiffies,
1451        },
1452        {
1453                .procname       = "nf_conntrack_tcp_timeout_last_ack",
1454                .maxlen         = sizeof(unsigned int),
1455                .mode           = 0644,
1456                .proc_handler   = proc_dointvec_jiffies,
1457        },
1458        {
1459                .procname       = "nf_conntrack_tcp_timeout_time_wait",
1460                .maxlen         = sizeof(unsigned int),
1461                .mode           = 0644,
1462                .proc_handler   = proc_dointvec_jiffies,
1463        },
1464        {
1465                .procname       = "nf_conntrack_tcp_timeout_close",
1466                .maxlen         = sizeof(unsigned int),
1467                .mode           = 0644,
1468                .proc_handler   = proc_dointvec_jiffies,
1469        },
1470        {
1471                .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1472                .maxlen         = sizeof(unsigned int),
1473                .mode           = 0644,
1474                .proc_handler   = proc_dointvec_jiffies,
1475        },
1476        {
1477                .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1478                .maxlen         = sizeof(unsigned int),
1479                .mode           = 0644,
1480                .proc_handler   = proc_dointvec_jiffies,
1481        },
1482        {
1483                .procname       = "nf_conntrack_tcp_loose",
1484                .maxlen         = sizeof(unsigned int),
1485                .mode           = 0644,
1486                .proc_handler   = proc_dointvec,
1487        },
1488        {
1489                .procname       = "nf_conntrack_tcp_be_liberal",
1490                .maxlen         = sizeof(unsigned int),
1491                .mode           = 0644,
1492                .proc_handler   = proc_dointvec,
1493        },
1494        {
1495                .procname       = "nf_conntrack_tcp_max_retrans",
1496                .maxlen         = sizeof(unsigned int),
1497                .mode           = 0644,
1498                .proc_handler   = proc_dointvec,
1499        },
1500        { }
1501};
1502#endif /* CONFIG_SYSCTL */
1503
1504static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1505                                    struct nf_tcp_net *tn)
1506{
1507#ifdef CONFIG_SYSCTL
1508        if (pn->ctl_table)
1509                return 0;
1510
1511        pn->ctl_table = kmemdup(tcp_sysctl_table,
1512                                sizeof(tcp_sysctl_table),
1513                                GFP_KERNEL);
1514        if (!pn->ctl_table)
1515                return -ENOMEM;
1516
1517        pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1518        pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1519        pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1520        pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1521        pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1522        pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1523        pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1524        pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1525        pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1526        pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1527        pn->ctl_table[10].data = &tn->tcp_loose;
1528        pn->ctl_table[11].data = &tn->tcp_be_liberal;
1529        pn->ctl_table[12].data = &tn->tcp_max_retrans;
1530#endif
1531        return 0;
1532}
1533
1534static int tcp_init_net(struct net *net, u_int16_t proto)
1535{
1536        struct nf_tcp_net *tn = tcp_pernet(net);
1537        struct nf_proto_net *pn = &tn->pn;
1538
1539        if (!pn->users) {
1540                int i;
1541
1542                for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1543                        tn->timeouts[i] = tcp_timeouts[i];
1544
1545                tn->tcp_loose = nf_ct_tcp_loose;
1546                tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1547                tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1548        }
1549
1550        return tcp_kmemdup_sysctl_table(pn, tn);
1551}
1552
1553static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1554{
1555        return &net->ct.nf_ct_proto.tcp.pn;
1556}
1557
1558const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1559{
1560        .l3proto                = PF_INET,
1561        .l4proto                = IPPROTO_TCP,
1562        .pkt_to_tuple           = tcp_pkt_to_tuple,
1563        .invert_tuple           = tcp_invert_tuple,
1564#ifdef CONFIG_NF_CONNTRACK_PROCFS
1565        .print_conntrack        = tcp_print_conntrack,
1566#endif
1567        .packet                 = tcp_packet,
1568        .get_timeouts           = tcp_get_timeouts,
1569        .new                    = tcp_new,
1570        .error                  = tcp_error,
1571        .can_early_drop         = tcp_can_early_drop,
1572#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1573        .to_nlattr              = tcp_to_nlattr,
1574        .from_nlattr            = nlattr_to_tcp,
1575        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1576        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1577        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1578        .nlattr_size            = TCP_NLATTR_SIZE,
1579        .nla_policy             = nf_ct_port_nla_policy,
1580#endif
1581#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1582        .ctnl_timeout           = {
1583                .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1584                .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1585                .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1586                .obj_size       = sizeof(unsigned int) *
1587                                        TCP_CONNTRACK_TIMEOUT_MAX,
1588                .nla_policy     = tcp_timeout_nla_policy,
1589        },
1590#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1591        .init_net               = tcp_init_net,
1592        .get_net_proto          = tcp_get_net_proto,
1593};
1594EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1595
1596const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1597{
1598        .l3proto                = PF_INET6,
1599        .l4proto                = IPPROTO_TCP,
1600        .pkt_to_tuple           = tcp_pkt_to_tuple,
1601        .invert_tuple           = tcp_invert_tuple,
1602#ifdef CONFIG_NF_CONNTRACK_PROCFS
1603        .print_conntrack        = tcp_print_conntrack,
1604#endif
1605        .packet                 = tcp_packet,
1606        .get_timeouts           = tcp_get_timeouts,
1607        .new                    = tcp_new,
1608        .error                  = tcp_error,
1609        .can_early_drop         = tcp_can_early_drop,
1610#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1611        .nlattr_size            = TCP_NLATTR_SIZE,
1612        .to_nlattr              = tcp_to_nlattr,
1613        .from_nlattr            = nlattr_to_tcp,
1614        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1615        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1616        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1617        .nla_policy             = nf_ct_port_nla_policy,
1618#endif
1619#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1620        .ctnl_timeout           = {
1621                .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1622                .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1623                .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1624                .obj_size       = sizeof(unsigned int) *
1625                                        TCP_CONNTRACK_TIMEOUT_MAX,
1626                .nla_policy     = tcp_timeout_nla_policy,
1627        },
1628#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1629        .init_net               = tcp_init_net,
1630        .get_net_proto          = tcp_get_net_proto,
1631};
1632EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1633