linux/net/netfilter/nf_conntrack_proto_tcp.c
<<
>>
Prefs
   1/* (C) 1999-2001 Paul `Rusty' Russell
   2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
   3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
   4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/timer.h>
  13#include <linux/module.h>
  14#include <linux/in.h>
  15#include <linux/tcp.h>
  16#include <linux/spinlock.h>
  17#include <linux/skbuff.h>
  18#include <linux/ipv6.h>
  19#include <net/ip6_checksum.h>
  20#include <asm/unaligned.h>
  21
  22#include <net/tcp.h>
  23
  24#include <linux/netfilter.h>
  25#include <linux/netfilter_ipv4.h>
  26#include <linux/netfilter_ipv6.h>
  27#include <net/netfilter/nf_conntrack.h>
  28#include <net/netfilter/nf_conntrack_l4proto.h>
  29#include <net/netfilter/nf_conntrack_ecache.h>
  30#include <net/netfilter/nf_log.h>
  31#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  32#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  33
  34/* "Be conservative in what you do,
  35    be liberal in what you accept from others."
  36    If it's non-zero, we mark only out of window RST segments as INVALID. */
  37static int nf_ct_tcp_be_liberal __read_mostly = 0;
  38
  39/* If it is set to zero, we disable picking up already established
  40   connections. */
  41static int nf_ct_tcp_loose __read_mostly = 1;
  42
  43/* Max number of the retransmitted packets without receiving an (acceptable)
  44   ACK from the destination. If this number is reached, a shorter timer
  45   will be started. */
  46static int nf_ct_tcp_max_retrans __read_mostly = 3;
  47
  48  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
  49     closely.  They're more complex. --RR */
  50
  51static const char *const tcp_conntrack_names[] = {
  52        "NONE",
  53        "SYN_SENT",
  54        "SYN_RECV",
  55        "ESTABLISHED",
  56        "FIN_WAIT",
  57        "CLOSE_WAIT",
  58        "LAST_ACK",
  59        "TIME_WAIT",
  60        "CLOSE",
  61        "SYN_SENT2",
  62};
  63
  64#define SECS * HZ
  65#define MINS * 60 SECS
  66#define HOURS * 60 MINS
  67#define DAYS * 24 HOURS
  68
  69static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
  70        [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
  71        [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
  72        [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
  73        [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
  74        [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
  75        [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
  76        [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
  77        [TCP_CONNTRACK_CLOSE]           = 10 SECS,
  78        [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
  79/* RFC1122 says the R2 limit should be at least 100 seconds.
  80   Linux uses 15 packets as limit, which corresponds
  81   to ~13-30min depending on RTO. */
  82        [TCP_CONNTRACK_RETRANS]         = 5 MINS,
  83        [TCP_CONNTRACK_UNACK]           = 5 MINS,
  84};
  85
  86#define sNO TCP_CONNTRACK_NONE
  87#define sSS TCP_CONNTRACK_SYN_SENT
  88#define sSR TCP_CONNTRACK_SYN_RECV
  89#define sES TCP_CONNTRACK_ESTABLISHED
  90#define sFW TCP_CONNTRACK_FIN_WAIT
  91#define sCW TCP_CONNTRACK_CLOSE_WAIT
  92#define sLA TCP_CONNTRACK_LAST_ACK
  93#define sTW TCP_CONNTRACK_TIME_WAIT
  94#define sCL TCP_CONNTRACK_CLOSE
  95#define sS2 TCP_CONNTRACK_SYN_SENT2
  96#define sIV TCP_CONNTRACK_MAX
  97#define sIG TCP_CONNTRACK_IGNORE
  98
  99/* What TCP flags are set from RST/SYN/FIN/ACK. */
 100enum tcp_bit_set {
 101        TCP_SYN_SET,
 102        TCP_SYNACK_SET,
 103        TCP_FIN_SET,
 104        TCP_ACK_SET,
 105        TCP_RST_SET,
 106        TCP_NONE_SET,
 107};
 108
 109/*
 110 * The TCP state transition table needs a few words...
 111 *
 112 * We are the man in the middle. All the packets go through us
 113 * but might get lost in transit to the destination.
 114 * It is assumed that the destinations can't receive segments
 115 * we haven't seen.
 116 *
 117 * The checked segment is in window, but our windows are *not*
 118 * equivalent with the ones of the sender/receiver. We always
 119 * try to guess the state of the current sender.
 120 *
 121 * The meaning of the states are:
 122 *
 123 * NONE:        initial state
 124 * SYN_SENT:    SYN-only packet seen
 125 * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
 126 * SYN_RECV:    SYN-ACK packet seen
 127 * ESTABLISHED: ACK packet seen
 128 * FIN_WAIT:    FIN packet seen
 129 * CLOSE_WAIT:  ACK seen (after FIN)
 130 * LAST_ACK:    FIN seen (after FIN)
 131 * TIME_WAIT:   last ACK seen
 132 * CLOSE:       closed connection (RST)
 133 *
 134 * Packets marked as IGNORED (sIG):
 135 *      if they may be either invalid or valid
 136 *      and the receiver may send back a connection
 137 *      closing RST or a SYN/ACK.
 138 *
 139 * Packets marked as INVALID (sIV):
 140 *      if we regard them as truly invalid packets
 141 */
 142static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 143        {
 144/* ORIGINAL */
 145/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 146/*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
 147/*
 148 *      sNO -> sSS      Initialize a new connection
 149 *      sSS -> sSS      Retransmitted SYN
 150 *      sS2 -> sS2      Late retransmitted SYN
 151 *      sSR -> sIG
 152 *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
 153 *                      are errors. Receiver will reply with RST
 154 *                      and close the connection.
 155 *                      Or we are not in sync and hold a dead connection.
 156 *      sFW -> sIG
 157 *      sCW -> sIG
 158 *      sLA -> sIG
 159 *      sTW -> sSS      Reopened connection (RFC 1122).
 160 *      sCL -> sSS
 161 */
 162/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 163/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
 164/*
 165 *      sNO -> sIV      Too late and no reason to do anything
 166 *      sSS -> sIV      Client can't send SYN and then SYN/ACK
 167 *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
 168 *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
 169 *      sES -> sIV      Invalid SYN/ACK packets sent by the client
 170 *      sFW -> sIV
 171 *      sCW -> sIV
 172 *      sLA -> sIV
 173 *      sTW -> sIV
 174 *      sCL -> sIV
 175 */
 176/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 177/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 178/*
 179 *      sNO -> sIV      Too late and no reason to do anything...
 180 *      sSS -> sIV      Client migth not send FIN in this state:
 181 *                      we enforce waiting for a SYN/ACK reply first.
 182 *      sS2 -> sIV
 183 *      sSR -> sFW      Close started.
 184 *      sES -> sFW
 185 *      sFW -> sLA      FIN seen in both directions, waiting for
 186 *                      the last ACK.
 187 *                      Migth be a retransmitted FIN as well...
 188 *      sCW -> sLA
 189 *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
 190 *      sTW -> sTW
 191 *      sCL -> sCL
 192 */
 193/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 194/*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 195/*
 196 *      sNO -> sES      Assumed.
 197 *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
 198 *      sS2 -> sIV
 199 *      sSR -> sES      Established state is reached.
 200 *      sES -> sES      :-)
 201 *      sFW -> sCW      Normal close request answered by ACK.
 202 *      sCW -> sCW
 203 *      sLA -> sTW      Last ACK detected.
 204 *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
 205 *      sCL -> sCL
 206 */
 207/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 208/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 209/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 210        },
 211        {
 212/* REPLY */
 213/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 214/*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
 215/*
 216 *      sNO -> sIV      Never reached.
 217 *      sSS -> sS2      Simultaneous open
 218 *      sS2 -> sS2      Retransmitted simultaneous SYN
 219 *      sSR -> sIV      Invalid SYN packets sent by the server
 220 *      sES -> sIV
 221 *      sFW -> sIV
 222 *      sCW -> sIV
 223 *      sLA -> sIV
 224 *      sTW -> sIV      Reopened connection, but server may not do it.
 225 *      sCL -> sIV
 226 */
 227/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 228/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 229/*
 230 *      sSS -> sSR      Standard open.
 231 *      sS2 -> sSR      Simultaneous open
 232 *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
 233 *      sES -> sIG      Late retransmitted SYN/ACK?
 234 *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
 235 *      sCW -> sIG
 236 *      sLA -> sIG
 237 *      sTW -> sIG
 238 *      sCL -> sIG
 239 */
 240/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 241/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 242/*
 243 *      sSS -> sIV      Server might not send FIN in this state.
 244 *      sS2 -> sIV
 245 *      sSR -> sFW      Close started.
 246 *      sES -> sFW
 247 *      sFW -> sLA      FIN seen in both directions.
 248 *      sCW -> sLA
 249 *      sLA -> sLA      Retransmitted FIN.
 250 *      sTW -> sTW
 251 *      sCL -> sCL
 252 */
 253/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 254/*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
 255/*
 256 *      sSS -> sIG      Might be a half-open connection.
 257 *      sS2 -> sIG
 258 *      sSR -> sSR      Might answer late resent SYN.
 259 *      sES -> sES      :-)
 260 *      sFW -> sCW      Normal close request answered by ACK.
 261 *      sCW -> sCW
 262 *      sLA -> sTW      Last ACK detected.
 263 *      sTW -> sTW      Retransmitted last ACK.
 264 *      sCL -> sCL
 265 */
 266/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 267/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 268/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 269        }
 270};
 271
 272static inline struct nf_tcp_net *tcp_pernet(struct net *net)
 273{
 274        return &net->ct.nf_ct_proto.tcp;
 275}
 276
 277static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 278                             struct nf_conntrack_tuple *tuple)
 279{
 280        const struct tcphdr *hp;
 281        struct tcphdr _hdr;
 282
 283        /* Actually only need first 8 bytes. */
 284        hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
 285        if (hp == NULL)
 286                return false;
 287
 288        tuple->src.u.tcp.port = hp->source;
 289        tuple->dst.u.tcp.port = hp->dest;
 290
 291        return true;
 292}
 293
 294static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 295                             const struct nf_conntrack_tuple *orig)
 296{
 297        tuple->src.u.tcp.port = orig->dst.u.tcp.port;
 298        tuple->dst.u.tcp.port = orig->src.u.tcp.port;
 299        return true;
 300}
 301
 302/* Print out the per-protocol part of the tuple. */
 303static int tcp_print_tuple(struct seq_file *s,
 304                           const struct nf_conntrack_tuple *tuple)
 305{
 306        return seq_printf(s, "sport=%hu dport=%hu ",
 307                          ntohs(tuple->src.u.tcp.port),
 308                          ntohs(tuple->dst.u.tcp.port));
 309}
 310
 311/* Print out the private part of the conntrack. */
 312static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 313{
 314        enum tcp_conntrack state;
 315
 316        spin_lock_bh(&ct->lock);
 317        state = ct->proto.tcp.state;
 318        spin_unlock_bh(&ct->lock);
 319
 320        return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 321}
 322
 323static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 324{
 325        if (tcph->rst) return TCP_RST_SET;
 326        else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
 327        else if (tcph->fin) return TCP_FIN_SET;
 328        else if (tcph->ack) return TCP_ACK_SET;
 329        else return TCP_NONE_SET;
 330}
 331
 332/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
 333   in IP Filter' by Guido van Rooij.
 334
 335   http://www.sane.nl/events/sane2000/papers.html
 336   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
 337
 338   The boundaries and the conditions are changed according to RFC793:
 339   the packet must intersect the window (i.e. segments may be
 340   after the right or before the left edge) and thus receivers may ACK
 341   segments after the right edge of the window.
 342
 343        td_maxend = max(sack + max(win,1)) seen in reply packets
 344        td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
 345        td_maxwin += seq + len - sender.td_maxend
 346                        if seq + len > sender.td_maxend
 347        td_end    = max(seq + len) seen in sent packets
 348
 349   I.   Upper bound for valid data:     seq <= sender.td_maxend
 350   II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
 351   III. Upper bound for valid (s)ack:   sack <= receiver.td_end
 352   IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
 353
 354   where sack is the highest right edge of sack block found in the packet
 355   or ack in the case of packet without SACK option.
 356
 357   The upper bound limit for a valid (s)ack is not ignored -
 358   we doesn't have to deal with fragments.
 359*/
 360
 361static inline __u32 segment_seq_plus_len(__u32 seq,
 362                                         size_t len,
 363                                         unsigned int dataoff,
 364                                         const struct tcphdr *tcph)
 365{
 366        /* XXX Should I use payload length field in IP/IPv6 header ?
 367         * - YK */
 368        return (seq + len - dataoff - tcph->doff*4
 369                + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
 370}
 371
 372/* Fixme: what about big packets? */
 373#define MAXACKWINCONST                  66000
 374#define MAXACKWINDOW(sender)                                            \
 375        ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
 376                                              : MAXACKWINCONST)
 377
 378/*
 379 * Simplified tcp_parse_options routine from tcp_input.c
 380 */
 381static void tcp_options(const struct sk_buff *skb,
 382                        unsigned int dataoff,
 383                        const struct tcphdr *tcph,
 384                        struct ip_ct_tcp_state *state)
 385{
 386        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 387        const unsigned char *ptr;
 388        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 389
 390        if (!length)
 391                return;
 392
 393        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 394                                 length, buff);
 395        BUG_ON(ptr == NULL);
 396
 397        state->td_scale =
 398        state->flags = 0;
 399
 400        while (length > 0) {
 401                int opcode=*ptr++;
 402                int opsize;
 403
 404                switch (opcode) {
 405                case TCPOPT_EOL:
 406                        return;
 407                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 408                        length--;
 409                        continue;
 410                default:
 411                        opsize=*ptr++;
 412                        if (opsize < 2) /* "silly options" */
 413                                return;
 414                        if (opsize > length)
 415                                return; /* don't parse partial options */
 416
 417                        if (opcode == TCPOPT_SACK_PERM
 418                            && opsize == TCPOLEN_SACK_PERM)
 419                                state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
 420                        else if (opcode == TCPOPT_WINDOW
 421                                 && opsize == TCPOLEN_WINDOW) {
 422                                state->td_scale = *(u_int8_t *)ptr;
 423
 424                                if (state->td_scale > 14) {
 425                                        /* See RFC1323 */
 426                                        state->td_scale = 14;
 427                                }
 428                                state->flags |=
 429                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 430                        }
 431                        ptr += opsize - 2;
 432                        length -= opsize;
 433                }
 434        }
 435}
 436
 437static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 438                     const struct tcphdr *tcph, __u32 *sack)
 439{
 440        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 441        const unsigned char *ptr;
 442        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 443        __u32 tmp;
 444
 445        if (!length)
 446                return;
 447
 448        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 449                                 length, buff);
 450        BUG_ON(ptr == NULL);
 451
 452        /* Fast path for timestamp-only option */
 453        if (length == TCPOLEN_TSTAMP_ALIGNED
 454            && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
 455                                       | (TCPOPT_NOP << 16)
 456                                       | (TCPOPT_TIMESTAMP << 8)
 457                                       | TCPOLEN_TIMESTAMP))
 458                return;
 459
 460        while (length > 0) {
 461                int opcode = *ptr++;
 462                int opsize, i;
 463
 464                switch (opcode) {
 465                case TCPOPT_EOL:
 466                        return;
 467                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 468                        length--;
 469                        continue;
 470                default:
 471                        opsize = *ptr++;
 472                        if (opsize < 2) /* "silly options" */
 473                                return;
 474                        if (opsize > length)
 475                                return; /* don't parse partial options */
 476
 477                        if (opcode == TCPOPT_SACK
 478                            && opsize >= (TCPOLEN_SACK_BASE
 479                                          + TCPOLEN_SACK_PERBLOCK)
 480                            && !((opsize - TCPOLEN_SACK_BASE)
 481                                 % TCPOLEN_SACK_PERBLOCK)) {
 482                                for (i = 0;
 483                                     i < (opsize - TCPOLEN_SACK_BASE);
 484                                     i += TCPOLEN_SACK_PERBLOCK) {
 485                                        tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
 486
 487                                        if (after(tmp, *sack))
 488                                                *sack = tmp;
 489                                }
 490                                return;
 491                        }
 492                        ptr += opsize - 2;
 493                        length -= opsize;
 494                }
 495        }
 496}
 497
 498#ifdef CONFIG_NF_NAT_NEEDED
 499static inline s16 nat_offset(const struct nf_conn *ct,
 500                             enum ip_conntrack_dir dir,
 501                             u32 seq)
 502{
 503        typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
 504
 505        return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
 506}
 507#define NAT_OFFSET(ct, dir, seq) \
 508        (nat_offset(ct, dir, seq))
 509#else
 510#define NAT_OFFSET(ct, dir, seq)        0
 511#endif
 512
 513static bool tcp_in_window(const struct nf_conn *ct,
 514                          struct ip_ct_tcp *state,
 515                          enum ip_conntrack_dir dir,
 516                          unsigned int index,
 517                          const struct sk_buff *skb,
 518                          unsigned int dataoff,
 519                          const struct tcphdr *tcph,
 520                          u_int8_t pf)
 521{
 522        struct net *net = nf_ct_net(ct);
 523        struct nf_tcp_net *tn = tcp_pernet(net);
 524        struct ip_ct_tcp_state *sender = &state->seen[dir];
 525        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 526        const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 527        __u32 seq, ack, sack, end, win, swin;
 528        s16 receiver_offset;
 529        bool res, in_recv_win;
 530
 531        /*
 532         * Get the required data from the packet.
 533         */
 534        seq = ntohl(tcph->seq);
 535        ack = sack = ntohl(tcph->ack_seq);
 536        win = ntohs(tcph->window);
 537        end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
 538
 539        if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
 540                tcp_sack(skb, dataoff, tcph, &sack);
 541
 542        /* Take into account NAT sequence number mangling */
 543        receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
 544        ack -= receiver_offset;
 545        sack -= receiver_offset;
 546
 547        pr_debug("tcp_in_window: START\n");
 548        pr_debug("tcp_in_window: ");
 549        nf_ct_dump_tuple(tuple);
 550        pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
 551                 seq, ack, receiver_offset, sack, receiver_offset, win, end);
 552        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 553                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 554                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 555                 sender->td_scale,
 556                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 557                 receiver->td_scale);
 558
 559        if (sender->td_maxwin == 0) {
 560                /*
 561                 * Initialize sender data.
 562                 */
 563                if (tcph->syn) {
 564                        /*
 565                         * SYN-ACK in reply to a SYN
 566                         * or SYN from reply direction in simultaneous open.
 567                         */
 568                        sender->td_end =
 569                        sender->td_maxend = end;
 570                        sender->td_maxwin = (win == 0 ? 1 : win);
 571
 572                        tcp_options(skb, dataoff, tcph, sender);
 573                        /*
 574                         * RFC 1323:
 575                         * Both sides must send the Window Scale option
 576                         * to enable window scaling in either direction.
 577                         */
 578                        if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
 579                              && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
 580                                sender->td_scale =
 581                                receiver->td_scale = 0;
 582                        if (!tcph->ack)
 583                                /* Simultaneous open */
 584                                return true;
 585                } else {
 586                        /*
 587                         * We are in the middle of a connection,
 588                         * its history is lost for us.
 589                         * Let's try to use the data from the packet.
 590                         */
 591                        sender->td_end = end;
 592                        swin = win << sender->td_scale;
 593                        sender->td_maxwin = (swin == 0 ? 1 : swin);
 594                        sender->td_maxend = end + sender->td_maxwin;
 595                        /*
 596                         * We haven't seen traffic in the other direction yet
 597                         * but we have to tweak window tracking to pass III
 598                         * and IV until that happens.
 599                         */
 600                        if (receiver->td_maxwin == 0)
 601                                receiver->td_end = receiver->td_maxend = sack;
 602                }
 603        } else if (((state->state == TCP_CONNTRACK_SYN_SENT
 604                     && dir == IP_CT_DIR_ORIGINAL)
 605                   || (state->state == TCP_CONNTRACK_SYN_RECV
 606                     && dir == IP_CT_DIR_REPLY))
 607                   && after(end, sender->td_end)) {
 608                /*
 609                 * RFC 793: "if a TCP is reinitialized ... then it need
 610                 * not wait at all; it must only be sure to use sequence
 611                 * numbers larger than those recently used."
 612                 */
 613                sender->td_end =
 614                sender->td_maxend = end;
 615                sender->td_maxwin = (win == 0 ? 1 : win);
 616
 617                tcp_options(skb, dataoff, tcph, sender);
 618        }
 619
 620        if (!(tcph->ack)) {
 621                /*
 622                 * If there is no ACK, just pretend it was set and OK.
 623                 */
 624                ack = sack = receiver->td_end;
 625        } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
 626                    (TCP_FLAG_ACK|TCP_FLAG_RST))
 627                   && (ack == 0)) {
 628                /*
 629                 * Broken TCP stacks, that set ACK in RST packets as well
 630                 * with zero ack value.
 631                 */
 632                ack = sack = receiver->td_end;
 633        }
 634
 635        if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
 636                /*
 637                 * RST sent answering SYN.
 638                 */
 639                seq = end = sender->td_end;
 640
 641        pr_debug("tcp_in_window: ");
 642        nf_ct_dump_tuple(tuple);
 643        pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
 644                 seq, ack, receiver_offset, sack, receiver_offset, win, end);
 645        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 646                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 647                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 648                 sender->td_scale,
 649                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 650                 receiver->td_scale);
 651
 652        /* Is the ending sequence in the receive window (if available)? */
 653        in_recv_win = !receiver->td_maxwin ||
 654                      after(end, sender->td_end - receiver->td_maxwin - 1);
 655
 656        pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 657                 before(seq, sender->td_maxend + 1),
 658                 (in_recv_win ? 1 : 0),
 659                 before(sack, receiver->td_end + 1),
 660                 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 661
 662        if (before(seq, sender->td_maxend + 1) &&
 663            in_recv_win &&
 664            before(sack, receiver->td_end + 1) &&
 665            after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 666                /*
 667                 * Take into account window scaling (RFC 1323).
 668                 */
 669                if (!tcph->syn)
 670                        win <<= sender->td_scale;
 671
 672                /*
 673                 * Update sender data.
 674                 */
 675                swin = win + (sack - ack);
 676                if (sender->td_maxwin < swin)
 677                        sender->td_maxwin = swin;
 678                if (after(end, sender->td_end)) {
 679                        sender->td_end = end;
 680                        sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 681                }
 682                if (tcph->ack) {
 683                        if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
 684                                sender->td_maxack = ack;
 685                                sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
 686                        } else if (after(ack, sender->td_maxack))
 687                                sender->td_maxack = ack;
 688                }
 689
 690                /*
 691                 * Update receiver data.
 692                 */
 693                if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
 694                        receiver->td_maxwin += end - sender->td_maxend;
 695                if (after(sack + win, receiver->td_maxend - 1)) {
 696                        receiver->td_maxend = sack + win;
 697                        if (win == 0)
 698                                receiver->td_maxend++;
 699                }
 700                if (ack == receiver->td_end)
 701                        receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 702
 703                /*
 704                 * Check retransmissions.
 705                 */
 706                if (index == TCP_ACK_SET) {
 707                        if (state->last_dir == dir
 708                            && state->last_seq == seq
 709                            && state->last_ack == ack
 710                            && state->last_end == end
 711                            && state->last_win == win)
 712                                state->retrans++;
 713                        else {
 714                                state->last_dir = dir;
 715                                state->last_seq = seq;
 716                                state->last_ack = ack;
 717                                state->last_end = end;
 718                                state->last_win = win;
 719                                state->retrans = 0;
 720                        }
 721                }
 722                res = true;
 723        } else {
 724                res = false;
 725                if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 726                    tn->tcp_be_liberal)
 727                        res = true;
 728                if (!res && LOG_INVALID(net, IPPROTO_TCP))
 729                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 730                        "nf_ct_tcp: %s ",
 731                        before(seq, sender->td_maxend + 1) ?
 732                        in_recv_win ?
 733                        before(sack, receiver->td_end + 1) ?
 734                        after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
 735                        : "ACK is under the lower bound (possible overly delayed ACK)"
 736                        : "ACK is over the upper bound (ACKed data not seen yet)"
 737                        : "SEQ is under the lower bound (already ACKed data retransmitted)"
 738                        : "SEQ is over the upper bound (over the window of the receiver)");
 739        }
 740
 741        pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
 742                 "receiver end=%u maxend=%u maxwin=%u\n",
 743                 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
 744                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
 745
 746        return res;
 747}
 748
 749/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
 750static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
 751                                 TCPHDR_URG) + 1] =
 752{
 753        [TCPHDR_SYN]                            = 1,
 754        [TCPHDR_SYN|TCPHDR_URG]                 = 1,
 755        [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
 756        [TCPHDR_RST]                            = 1,
 757        [TCPHDR_RST|TCPHDR_ACK]                 = 1,
 758        [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
 759        [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
 760        [TCPHDR_ACK]                            = 1,
 761        [TCPHDR_ACK|TCPHDR_URG]                 = 1,
 762};
 763
 764/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
 765static int tcp_error(struct net *net, struct nf_conn *tmpl,
 766                     struct sk_buff *skb,
 767                     unsigned int dataoff,
 768                     enum ip_conntrack_info *ctinfo,
 769                     u_int8_t pf,
 770                     unsigned int hooknum)
 771{
 772        const struct tcphdr *th;
 773        struct tcphdr _tcph;
 774        unsigned int tcplen = skb->len - dataoff;
 775        u_int8_t tcpflags;
 776
 777        /* Smaller that minimal TCP header? */
 778        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 779        if (th == NULL) {
 780                if (LOG_INVALID(net, IPPROTO_TCP))
 781                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 782                                "nf_ct_tcp: short packet ");
 783                return -NF_ACCEPT;
 784        }
 785
 786        /* Not whole TCP header or malformed packet */
 787        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
 788                if (LOG_INVALID(net, IPPROTO_TCP))
 789                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 790                                "nf_ct_tcp: truncated/malformed packet ");
 791                return -NF_ACCEPT;
 792        }
 793
 794        /* Checksum invalid? Ignore.
 795         * We skip checking packets on the outgoing path
 796         * because the checksum is assumed to be correct.
 797         */
 798        /* FIXME: Source route IP option packets --RR */
 799        if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 800            nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 801                if (LOG_INVALID(net, IPPROTO_TCP))
 802                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 803                                  "nf_ct_tcp: bad TCP checksum ");
 804                return -NF_ACCEPT;
 805        }
 806
 807        /* Check TCP flags. */
 808        tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 809        if (!tcp_valid_flags[tcpflags]) {
 810                if (LOG_INVALID(net, IPPROTO_TCP))
 811                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 812                                  "nf_ct_tcp: invalid TCP flag combination ");
 813                return -NF_ACCEPT;
 814        }
 815
 816        return NF_ACCEPT;
 817}
 818
 819static unsigned int *tcp_get_timeouts(struct net *net)
 820{
 821        return tcp_pernet(net)->timeouts;
 822}
 823
 824/* Returns verdict for packet, or -1 for invalid. */
 825static int tcp_packet(struct nf_conn *ct,
 826                      const struct sk_buff *skb,
 827                      unsigned int dataoff,
 828                      enum ip_conntrack_info ctinfo,
 829                      u_int8_t pf,
 830                      unsigned int hooknum,
 831                      unsigned int *timeouts)
 832{
 833        struct net *net = nf_ct_net(ct);
 834        struct nf_tcp_net *tn = tcp_pernet(net);
 835        struct nf_conntrack_tuple *tuple;
 836        enum tcp_conntrack new_state, old_state;
 837        enum ip_conntrack_dir dir;
 838        const struct tcphdr *th;
 839        struct tcphdr _tcph;
 840        unsigned long timeout;
 841        unsigned int index;
 842
 843        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 844        BUG_ON(th == NULL);
 845
 846        spin_lock_bh(&ct->lock);
 847        old_state = ct->proto.tcp.state;
 848        dir = CTINFO2DIR(ctinfo);
 849        index = get_conntrack_index(th);
 850        new_state = tcp_conntracks[dir][index][old_state];
 851        tuple = &ct->tuplehash[dir].tuple;
 852
 853        switch (new_state) {
 854        case TCP_CONNTRACK_SYN_SENT:
 855                if (old_state < TCP_CONNTRACK_TIME_WAIT)
 856                        break;
 857                /* RFC 1122: "When a connection is closed actively,
 858                 * it MUST linger in TIME-WAIT state for a time 2xMSL
 859                 * (Maximum Segment Lifetime). However, it MAY accept
 860                 * a new SYN from the remote TCP to reopen the connection
 861                 * directly from TIME-WAIT state, if..."
 862                 * We ignore the conditions because we are in the
 863                 * TIME-WAIT state anyway.
 864                 *
 865                 * Handle aborted connections: we and the server
 866                 * think there is an existing connection but the client
 867                 * aborts it and starts a new one.
 868                 */
 869                if (((ct->proto.tcp.seen[dir].flags
 870                      | ct->proto.tcp.seen[!dir].flags)
 871                     & IP_CT_TCP_FLAG_CLOSE_INIT)
 872                    || (ct->proto.tcp.last_dir == dir
 873                        && ct->proto.tcp.last_index == TCP_RST_SET)) {
 874                        /* Attempt to reopen a closed/aborted connection.
 875                         * Delete this connection and look up again. */
 876                        spin_unlock_bh(&ct->lock);
 877
 878                        /* Only repeat if we can actually remove the timer.
 879                         * Destruction may already be in progress in process
 880                         * context and we must give it a chance to terminate.
 881                         */
 882                        if (nf_ct_kill(ct))
 883                                return -NF_REPEAT;
 884                        return NF_DROP;
 885                }
 886                /* Fall through */
 887        case TCP_CONNTRACK_IGNORE:
 888                /* Ignored packets:
 889                 *
 890                 * Our connection entry may be out of sync, so ignore
 891                 * packets which may signal the real connection between
 892                 * the client and the server.
 893                 *
 894                 * a) SYN in ORIGINAL
 895                 * b) SYN/ACK in REPLY
 896                 * c) ACK in reply direction after initial SYN in original.
 897                 *
 898                 * If the ignored packet is invalid, the receiver will send
 899                 * a RST we'll catch below.
 900                 */
 901                if (index == TCP_SYNACK_SET
 902                    && ct->proto.tcp.last_index == TCP_SYN_SET
 903                    && ct->proto.tcp.last_dir != dir
 904                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 905                        /* b) This SYN/ACK acknowledges a SYN that we earlier
 906                         * ignored as invalid. This means that the client and
 907                         * the server are both in sync, while the firewall is
 908                         * not. We get in sync from the previously annotated
 909                         * values.
 910                         */
 911                        old_state = TCP_CONNTRACK_SYN_SENT;
 912                        new_state = TCP_CONNTRACK_SYN_RECV;
 913                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
 914                                ct->proto.tcp.last_end;
 915                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
 916                                ct->proto.tcp.last_end;
 917                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
 918                                ct->proto.tcp.last_win == 0 ?
 919                                        1 : ct->proto.tcp.last_win;
 920                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
 921                                ct->proto.tcp.last_wscale;
 922                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
 923                                ct->proto.tcp.last_flags;
 924                        memset(&ct->proto.tcp.seen[dir], 0,
 925                               sizeof(struct ip_ct_tcp_state));
 926                        break;
 927                }
 928                ct->proto.tcp.last_index = index;
 929                ct->proto.tcp.last_dir = dir;
 930                ct->proto.tcp.last_seq = ntohl(th->seq);
 931                ct->proto.tcp.last_end =
 932                    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 933                ct->proto.tcp.last_win = ntohs(th->window);
 934
 935                /* a) This is a SYN in ORIGINAL. The client and the server
 936                 * may be in sync but we are not. In that case, we annotate
 937                 * the TCP options and let the packet go through. If it is a
 938                 * valid SYN packet, the server will reply with a SYN/ACK, and
 939                 * then we'll get in sync. Otherwise, the server ignores it. */
 940                if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
 941                        struct ip_ct_tcp_state seen = {};
 942
 943                        ct->proto.tcp.last_flags =
 944                        ct->proto.tcp.last_wscale = 0;
 945                        tcp_options(skb, dataoff, th, &seen);
 946                        if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
 947                                ct->proto.tcp.last_flags |=
 948                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 949                                ct->proto.tcp.last_wscale = seen.td_scale;
 950                        }
 951                        if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
 952                                ct->proto.tcp.last_flags |=
 953                                        IP_CT_TCP_FLAG_SACK_PERM;
 954                        }
 955                }
 956                spin_unlock_bh(&ct->lock);
 957                if (LOG_INVALID(net, IPPROTO_TCP))
 958                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 959                                  "nf_ct_tcp: invalid packet ignored in "
 960                                  "state %s ", tcp_conntrack_names[old_state]);
 961                return NF_ACCEPT;
 962        case TCP_CONNTRACK_MAX:
 963                /* Invalid packet */
 964                pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 965                         dir, get_conntrack_index(th), old_state);
 966                spin_unlock_bh(&ct->lock);
 967                if (LOG_INVALID(net, IPPROTO_TCP))
 968                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 969                                  "nf_ct_tcp: invalid state ");
 970                return -NF_ACCEPT;
 971        case TCP_CONNTRACK_CLOSE:
 972                if (index == TCP_RST_SET
 973                    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
 974                    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
 975                        /* Invalid RST  */
 976                        spin_unlock_bh(&ct->lock);
 977                        if (LOG_INVALID(net, IPPROTO_TCP))
 978                                nf_log_packet(net, pf, 0, skb, NULL, NULL,
 979                                              NULL, "nf_ct_tcp: invalid RST ");
 980                        return -NF_ACCEPT;
 981                }
 982                if (index == TCP_RST_SET
 983                    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
 984                         && ct->proto.tcp.last_index == TCP_SYN_SET)
 985                        || (!test_bit(IPS_ASSURED_BIT, &ct->status)
 986                            && ct->proto.tcp.last_index == TCP_ACK_SET))
 987                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 988                        /* RST sent to invalid SYN or ACK we had let through
 989                         * at a) and c) above:
 990                         *
 991                         * a) SYN was in window then
 992                         * c) we hold a half-open connection.
 993                         *
 994                         * Delete our connection entry.
 995                         * We skip window checking, because packet might ACK
 996                         * segments we ignored. */
 997                        goto in_window;
 998                }
 999                /* Just fall through */
1000        default:
1001                /* Keep compilers happy. */
1002                break;
1003        }
1004
1005        if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1006                           skb, dataoff, th, pf)) {
1007                spin_unlock_bh(&ct->lock);
1008                return -NF_ACCEPT;
1009        }
1010     in_window:
1011        /* From now on we have got in-window packets */
1012        ct->proto.tcp.last_index = index;
1013        ct->proto.tcp.last_dir = dir;
1014
1015        pr_debug("tcp_conntracks: ");
1016        nf_ct_dump_tuple(tuple);
1017        pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1018                 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1019                 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1020                 old_state, new_state);
1021
1022        ct->proto.tcp.state = new_state;
1023        if (old_state != new_state
1024            && new_state == TCP_CONNTRACK_FIN_WAIT)
1025                ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1026
1027        if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1028            timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1029                timeout = timeouts[TCP_CONNTRACK_RETRANS];
1030        else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1031                 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1032                 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1033                timeout = timeouts[TCP_CONNTRACK_UNACK];
1034        else
1035                timeout = timeouts[new_state];
1036        spin_unlock_bh(&ct->lock);
1037
1038        if (new_state != old_state)
1039                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1040
1041        if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1042                /* If only reply is a RST, we can consider ourselves not to
1043                   have an established connection: this is a fairly common
1044                   problem case, so we can delete the conntrack
1045                   immediately.  --RR */
1046                if (th->rst) {
1047                        nf_ct_kill_acct(ct, ctinfo, skb);
1048                        return NF_ACCEPT;
1049                }
1050                /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1051                 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1052                 */
1053                if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1054                    timeout > timeouts[TCP_CONNTRACK_UNACK])
1055                        timeout = timeouts[TCP_CONNTRACK_UNACK];
1056        } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1057                   && (old_state == TCP_CONNTRACK_SYN_RECV
1058                       || old_state == TCP_CONNTRACK_ESTABLISHED)
1059                   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1060                /* Set ASSURED if we see see valid ack in ESTABLISHED
1061                   after SYN_RECV or a valid answer for a picked up
1062                   connection. */
1063                set_bit(IPS_ASSURED_BIT, &ct->status);
1064                nf_conntrack_event_cache(IPCT_ASSURED, ct);
1065        }
1066        nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1067
1068        return NF_ACCEPT;
1069}
1070
1071/* Called when a new connection for this protocol found. */
1072static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1073                    unsigned int dataoff, unsigned int *timeouts)
1074{
1075        enum tcp_conntrack new_state;
1076        const struct tcphdr *th;
1077        struct tcphdr _tcph;
1078        struct net *net = nf_ct_net(ct);
1079        struct nf_tcp_net *tn = tcp_pernet(net);
1080        const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1081        const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1082
1083        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1084        BUG_ON(th == NULL);
1085
1086        /* Don't need lock here: this conntrack not in circulation yet */
1087        new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1088
1089        /* Invalid: delete conntrack */
1090        if (new_state >= TCP_CONNTRACK_MAX) {
1091                pr_debug("nf_ct_tcp: invalid new deleting.\n");
1092                return false;
1093        }
1094
1095        if (new_state == TCP_CONNTRACK_SYN_SENT) {
1096                memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1097                /* SYN packet */
1098                ct->proto.tcp.seen[0].td_end =
1099                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1100                                             dataoff, th);
1101                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1102                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1103                        ct->proto.tcp.seen[0].td_maxwin = 1;
1104                ct->proto.tcp.seen[0].td_maxend =
1105                        ct->proto.tcp.seen[0].td_end;
1106
1107                tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1108        } else if (tn->tcp_loose == 0) {
1109                /* Don't try to pick up connections. */
1110                return false;
1111        } else {
1112                memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1113                /*
1114                 * We are in the middle of a connection,
1115                 * its history is lost for us.
1116                 * Let's try to use the data from the packet.
1117                 */
1118                ct->proto.tcp.seen[0].td_end =
1119                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1120                                             dataoff, th);
1121                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1122                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1123                        ct->proto.tcp.seen[0].td_maxwin = 1;
1124                ct->proto.tcp.seen[0].td_maxend =
1125                        ct->proto.tcp.seen[0].td_end +
1126                        ct->proto.tcp.seen[0].td_maxwin;
1127
1128                /* We assume SACK and liberal window checking to handle
1129                 * window scaling */
1130                ct->proto.tcp.seen[0].flags =
1131                ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1132                                              IP_CT_TCP_FLAG_BE_LIBERAL;
1133        }
1134
1135        /* tcp_packet will set them */
1136        ct->proto.tcp.last_index = TCP_NONE_SET;
1137
1138        pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1139                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1140                 sender->td_end, sender->td_maxend, sender->td_maxwin,
1141                 sender->td_scale,
1142                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1143                 receiver->td_scale);
1144        return true;
1145}
1146
1147#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1148
1149#include <linux/netfilter/nfnetlink.h>
1150#include <linux/netfilter/nfnetlink_conntrack.h>
1151
1152static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1153                         struct nf_conn *ct)
1154{
1155        struct nlattr *nest_parms;
1156        struct nf_ct_tcp_flags tmp = {};
1157
1158        spin_lock_bh(&ct->lock);
1159        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1160        if (!nest_parms)
1161                goto nla_put_failure;
1162
1163        if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1164            nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1165                       ct->proto.tcp.seen[0].td_scale) ||
1166            nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1167                       ct->proto.tcp.seen[1].td_scale))
1168                goto nla_put_failure;
1169
1170        tmp.flags = ct->proto.tcp.seen[0].flags;
1171        if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1172                    sizeof(struct nf_ct_tcp_flags), &tmp))
1173                goto nla_put_failure;
1174
1175        tmp.flags = ct->proto.tcp.seen[1].flags;
1176        if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1177                    sizeof(struct nf_ct_tcp_flags), &tmp))
1178                goto nla_put_failure;
1179        spin_unlock_bh(&ct->lock);
1180
1181        nla_nest_end(skb, nest_parms);
1182
1183        return 0;
1184
1185nla_put_failure:
1186        spin_unlock_bh(&ct->lock);
1187        return -1;
1188}
1189
1190static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1191        [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1192        [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1193        [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1194        [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1195        [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1196};
1197
1198static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1199{
1200        struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1201        struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1202        int err;
1203
1204        /* updates could not contain anything about the private
1205         * protocol info, in that case skip the parsing */
1206        if (!pattr)
1207                return 0;
1208
1209        err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1210        if (err < 0)
1211                return err;
1212
1213        if (tb[CTA_PROTOINFO_TCP_STATE] &&
1214            nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1215                return -EINVAL;
1216
1217        spin_lock_bh(&ct->lock);
1218        if (tb[CTA_PROTOINFO_TCP_STATE])
1219                ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1220
1221        if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1222                struct nf_ct_tcp_flags *attr =
1223                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1224                ct->proto.tcp.seen[0].flags &= ~attr->mask;
1225                ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1226        }
1227
1228        if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1229                struct nf_ct_tcp_flags *attr =
1230                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1231                ct->proto.tcp.seen[1].flags &= ~attr->mask;
1232                ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1233        }
1234
1235        if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1236            tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1237            ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1238            ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1239                ct->proto.tcp.seen[0].td_scale =
1240                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1241                ct->proto.tcp.seen[1].td_scale =
1242                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1243        }
1244        spin_unlock_bh(&ct->lock);
1245
1246        return 0;
1247}
1248
1249static int tcp_nlattr_size(void)
1250{
1251        return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1252                + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1253}
1254
1255static int tcp_nlattr_tuple_size(void)
1256{
1257        return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1258}
1259#endif
1260
1261#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1262
1263#include <linux/netfilter/nfnetlink.h>
1264#include <linux/netfilter/nfnetlink_cttimeout.h>
1265
1266static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1267                                     struct net *net, void *data)
1268{
1269        unsigned int *timeouts = data;
1270        struct nf_tcp_net *tn = tcp_pernet(net);
1271        int i;
1272
1273        /* set default TCP timeouts. */
1274        for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1275                timeouts[i] = tn->timeouts[i];
1276
1277        if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1278                timeouts[TCP_CONNTRACK_SYN_SENT] =
1279                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1280        }
1281        if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1282                timeouts[TCP_CONNTRACK_SYN_RECV] =
1283                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1284        }
1285        if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1286                timeouts[TCP_CONNTRACK_ESTABLISHED] =
1287                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1288        }
1289        if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1290                timeouts[TCP_CONNTRACK_FIN_WAIT] =
1291                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1292        }
1293        if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1294                timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1295                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1296        }
1297        if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1298                timeouts[TCP_CONNTRACK_LAST_ACK] =
1299                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1300        }
1301        if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1302                timeouts[TCP_CONNTRACK_TIME_WAIT] =
1303                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1304        }
1305        if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1306                timeouts[TCP_CONNTRACK_CLOSE] =
1307                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1308        }
1309        if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1310                timeouts[TCP_CONNTRACK_SYN_SENT2] =
1311                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1312        }
1313        if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1314                timeouts[TCP_CONNTRACK_RETRANS] =
1315                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1316        }
1317        if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1318                timeouts[TCP_CONNTRACK_UNACK] =
1319                        ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1320        }
1321        return 0;
1322}
1323
1324static int
1325tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1326{
1327        const unsigned int *timeouts = data;
1328
1329        if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1330                        htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1331            nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1332                         htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1333            nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1334                         htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1335            nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1336                         htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1337            nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1338                         htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1339            nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1340                         htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1341            nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1342                         htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1343            nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1344                         htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1345            nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1346                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1347            nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1348                         htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1349            nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1350                         htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1351                goto nla_put_failure;
1352        return 0;
1353
1354nla_put_failure:
1355        return -ENOSPC;
1356}
1357
1358static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1359        [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1360        [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1361        [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1362        [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1363        [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1364        [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1365        [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1366        [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1367        [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1368        [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1369        [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1370};
1371#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1372
1373#ifdef CONFIG_SYSCTL
1374static struct ctl_table tcp_sysctl_table[] = {
1375        {
1376                .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1377                .maxlen         = sizeof(unsigned int),
1378                .mode           = 0644,
1379                .proc_handler   = proc_dointvec_jiffies,
1380        },
1381        {
1382                .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1383                .maxlen         = sizeof(unsigned int),
1384                .mode           = 0644,
1385                .proc_handler   = proc_dointvec_jiffies,
1386        },
1387        {
1388                .procname       = "nf_conntrack_tcp_timeout_established",
1389                .maxlen         = sizeof(unsigned int),
1390                .mode           = 0644,
1391                .proc_handler   = proc_dointvec_jiffies,
1392        },
1393        {
1394                .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1395                .maxlen         = sizeof(unsigned int),
1396                .mode           = 0644,
1397                .proc_handler   = proc_dointvec_jiffies,
1398        },
1399        {
1400                .procname       = "nf_conntrack_tcp_timeout_close_wait",
1401                .maxlen         = sizeof(unsigned int),
1402                .mode           = 0644,
1403                .proc_handler   = proc_dointvec_jiffies,
1404        },
1405        {
1406                .procname       = "nf_conntrack_tcp_timeout_last_ack",
1407                .maxlen         = sizeof(unsigned int),
1408                .mode           = 0644,
1409                .proc_handler   = proc_dointvec_jiffies,
1410        },
1411        {
1412                .procname       = "nf_conntrack_tcp_timeout_time_wait",
1413                .maxlen         = sizeof(unsigned int),
1414                .mode           = 0644,
1415                .proc_handler   = proc_dointvec_jiffies,
1416        },
1417        {
1418                .procname       = "nf_conntrack_tcp_timeout_close",
1419                .maxlen         = sizeof(unsigned int),
1420                .mode           = 0644,
1421                .proc_handler   = proc_dointvec_jiffies,
1422        },
1423        {
1424                .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1425                .maxlen         = sizeof(unsigned int),
1426                .mode           = 0644,
1427                .proc_handler   = proc_dointvec_jiffies,
1428        },
1429        {
1430                .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1431                .maxlen         = sizeof(unsigned int),
1432                .mode           = 0644,
1433                .proc_handler   = proc_dointvec_jiffies,
1434        },
1435        {
1436                .procname       = "nf_conntrack_tcp_loose",
1437                .maxlen         = sizeof(unsigned int),
1438                .mode           = 0644,
1439                .proc_handler   = proc_dointvec,
1440        },
1441        {
1442                .procname       = "nf_conntrack_tcp_be_liberal",
1443                .maxlen         = sizeof(unsigned int),
1444                .mode           = 0644,
1445                .proc_handler   = proc_dointvec,
1446        },
1447        {
1448                .procname       = "nf_conntrack_tcp_max_retrans",
1449                .maxlen         = sizeof(unsigned int),
1450                .mode           = 0644,
1451                .proc_handler   = proc_dointvec,
1452        },
1453        { }
1454};
1455
1456#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1457static struct ctl_table tcp_compat_sysctl_table[] = {
1458        {
1459                .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1460                .maxlen         = sizeof(unsigned int),
1461                .mode           = 0644,
1462                .proc_handler   = proc_dointvec_jiffies,
1463        },
1464        {
1465                .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1466                .maxlen         = sizeof(unsigned int),
1467                .mode           = 0644,
1468                .proc_handler   = proc_dointvec_jiffies,
1469        },
1470        {
1471                .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1472                .maxlen         = sizeof(unsigned int),
1473                .mode           = 0644,
1474                .proc_handler   = proc_dointvec_jiffies,
1475        },
1476        {
1477                .procname       = "ip_conntrack_tcp_timeout_established",
1478                .maxlen         = sizeof(unsigned int),
1479                .mode           = 0644,
1480                .proc_handler   = proc_dointvec_jiffies,
1481        },
1482        {
1483                .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1484                .maxlen         = sizeof(unsigned int),
1485                .mode           = 0644,
1486                .proc_handler   = proc_dointvec_jiffies,
1487        },
1488        {
1489                .procname       = "ip_conntrack_tcp_timeout_close_wait",
1490                .maxlen         = sizeof(unsigned int),
1491                .mode           = 0644,
1492                .proc_handler   = proc_dointvec_jiffies,
1493        },
1494        {
1495                .procname       = "ip_conntrack_tcp_timeout_last_ack",
1496                .maxlen         = sizeof(unsigned int),
1497                .mode           = 0644,
1498                .proc_handler   = proc_dointvec_jiffies,
1499        },
1500        {
1501                .procname       = "ip_conntrack_tcp_timeout_time_wait",
1502                .maxlen         = sizeof(unsigned int),
1503                .mode           = 0644,
1504                .proc_handler   = proc_dointvec_jiffies,
1505        },
1506        {
1507                .procname       = "ip_conntrack_tcp_timeout_close",
1508                .maxlen         = sizeof(unsigned int),
1509                .mode           = 0644,
1510                .proc_handler   = proc_dointvec_jiffies,
1511        },
1512        {
1513                .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1514                .maxlen         = sizeof(unsigned int),
1515                .mode           = 0644,
1516                .proc_handler   = proc_dointvec_jiffies,
1517        },
1518        {
1519                .procname       = "ip_conntrack_tcp_loose",
1520                .maxlen         = sizeof(unsigned int),
1521                .mode           = 0644,
1522                .proc_handler   = proc_dointvec,
1523        },
1524        {
1525                .procname       = "ip_conntrack_tcp_be_liberal",
1526                .maxlen         = sizeof(unsigned int),
1527                .mode           = 0644,
1528                .proc_handler   = proc_dointvec,
1529        },
1530        {
1531                .procname       = "ip_conntrack_tcp_max_retrans",
1532                .maxlen         = sizeof(unsigned int),
1533                .mode           = 0644,
1534                .proc_handler   = proc_dointvec,
1535        },
1536        { }
1537};
1538#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1539#endif /* CONFIG_SYSCTL */
1540
1541static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1542                                    struct nf_tcp_net *tn)
1543{
1544#ifdef CONFIG_SYSCTL
1545        if (pn->ctl_table)
1546                return 0;
1547
1548        pn->ctl_table = kmemdup(tcp_sysctl_table,
1549                                sizeof(tcp_sysctl_table),
1550                                GFP_KERNEL);
1551        if (!pn->ctl_table)
1552                return -ENOMEM;
1553
1554        pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1555        pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1556        pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1557        pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1558        pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1559        pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1560        pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1561        pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1562        pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1563        pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1564        pn->ctl_table[10].data = &tn->tcp_loose;
1565        pn->ctl_table[11].data = &tn->tcp_be_liberal;
1566        pn->ctl_table[12].data = &tn->tcp_max_retrans;
1567#endif
1568        return 0;
1569}
1570
1571static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1572                                           struct nf_tcp_net *tn)
1573{
1574#ifdef CONFIG_SYSCTL
1575#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1576        pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1577                                       sizeof(tcp_compat_sysctl_table),
1578                                       GFP_KERNEL);
1579        if (!pn->ctl_compat_table)
1580                return -ENOMEM;
1581
1582        pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1583        pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1584        pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1585        pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1586        pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1587        pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1588        pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1589        pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1590        pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1591        pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1592        pn->ctl_compat_table[10].data = &tn->tcp_loose;
1593        pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1594        pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1595#endif
1596#endif
1597        return 0;
1598}
1599
1600static int tcp_init_net(struct net *net, u_int16_t proto)
1601{
1602        int ret;
1603        struct nf_tcp_net *tn = tcp_pernet(net);
1604        struct nf_proto_net *pn = &tn->pn;
1605
1606        if (!pn->users) {
1607                int i;
1608
1609                for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1610                        tn->timeouts[i] = tcp_timeouts[i];
1611
1612                tn->tcp_loose = nf_ct_tcp_loose;
1613                tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1614                tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1615        }
1616
1617        if (proto == AF_INET) {
1618                ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1619                if (ret < 0)
1620                        return ret;
1621
1622                ret = tcp_kmemdup_sysctl_table(pn, tn);
1623                if (ret < 0)
1624                        nf_ct_kfree_compat_sysctl_table(pn);
1625        } else
1626                ret = tcp_kmemdup_sysctl_table(pn, tn);
1627
1628        return ret;
1629}
1630
1631static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1632{
1633        return &net->ct.nf_ct_proto.tcp.pn;
1634}
1635
1636struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1637{
1638        .l3proto                = PF_INET,
1639        .l4proto                = IPPROTO_TCP,
1640        .name                   = "tcp",
1641        .pkt_to_tuple           = tcp_pkt_to_tuple,
1642        .invert_tuple           = tcp_invert_tuple,
1643        .print_tuple            = tcp_print_tuple,
1644        .print_conntrack        = tcp_print_conntrack,
1645        .packet                 = tcp_packet,
1646        .get_timeouts           = tcp_get_timeouts,
1647        .new                    = tcp_new,
1648        .error                  = tcp_error,
1649#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1650        .to_nlattr              = tcp_to_nlattr,
1651        .nlattr_size            = tcp_nlattr_size,
1652        .from_nlattr            = nlattr_to_tcp,
1653        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1654        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1655        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1656        .nla_policy             = nf_ct_port_nla_policy,
1657#endif
1658#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1659        .ctnl_timeout           = {
1660                .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1661                .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1662                .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1663                .obj_size       = sizeof(unsigned int) *
1664                                        TCP_CONNTRACK_TIMEOUT_MAX,
1665                .nla_policy     = tcp_timeout_nla_policy,
1666        },
1667#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1668        .init_net               = tcp_init_net,
1669        .get_net_proto          = tcp_get_net_proto,
1670};
1671EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1672
1673struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1674{
1675        .l3proto                = PF_INET6,
1676        .l4proto                = IPPROTO_TCP,
1677        .name                   = "tcp",
1678        .pkt_to_tuple           = tcp_pkt_to_tuple,
1679        .invert_tuple           = tcp_invert_tuple,
1680        .print_tuple            = tcp_print_tuple,
1681        .print_conntrack        = tcp_print_conntrack,
1682        .packet                 = tcp_packet,
1683        .get_timeouts           = tcp_get_timeouts,
1684        .new                    = tcp_new,
1685        .error                  = tcp_error,
1686#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1687        .to_nlattr              = tcp_to_nlattr,
1688        .nlattr_size            = tcp_nlattr_size,
1689        .from_nlattr            = nlattr_to_tcp,
1690        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1691        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1692        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1693        .nla_policy             = nf_ct_port_nla_policy,
1694#endif
1695#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1696        .ctnl_timeout           = {
1697                .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1698                .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1699                .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1700                .obj_size       = sizeof(unsigned int) *
1701                                        TCP_CONNTRACK_TIMEOUT_MAX,
1702                .nla_policy     = tcp_timeout_nla_policy,
1703        },
1704#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1705        .init_net               = tcp_init_net,
1706        .get_net_proto          = tcp_get_net_proto,
1707};
1708EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1709