qemu/slirp/tcp_input.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
   3 *      The Regents of the University of California.  All rights reserved.
   4 *
   5 * Redistribution and use in source and binary forms, with or without
   6 * modification, are permitted provided that the following conditions
   7 * are met:
   8 * 1. Redistributions of source code must retain the above copyright
   9 *    notice, this list of conditions and the following disclaimer.
  10 * 2. Redistributions in binary form must reproduce the above copyright
  11 *    notice, this list of conditions and the following disclaimer in the
  12 *    documentation and/or other materials provided with the distribution.
  13 * 3. Neither the name of the University nor the names of its contributors
  14 *    may be used to endorse or promote products derived from this software
  15 *    without specific prior written permission.
  16 *
  17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27 * SUCH DAMAGE.
  28 *
  29 *      @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
  30 * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
  31 */
  32
  33/*
  34 * Changes and additions relating to SLiRP
  35 * Copyright (c) 1995 Danny Gasparovski.
  36 *
  37 * Please read the file COPYRIGHT for the
  38 * terms and conditions of the copyright.
  39 */
  40
  41#include <slirp.h>
  42#include "ip_icmp.h"
  43
  44#define TCPREXMTTHRESH 3
  45
  46#define TCP_PAWS_IDLE   (24 * 24 * 60 * 60 * PR_SLOWHZ)
  47
  48/* for modulo comparisons of timestamps */
  49#define TSTMP_LT(a,b)   ((int)((a)-(b)) < 0)
  50#define TSTMP_GEQ(a,b)  ((int)((a)-(b)) >= 0)
  51
  52/*
  53 * Insert segment ti into reassembly queue of tcp with
  54 * control block tp.  Return TH_FIN if reassembly now includes
  55 * a segment with FIN.  The macro form does the common case inline
  56 * (segment is the next to be received on an established connection,
  57 * and the queue is empty), avoiding linkage into and removal
  58 * from the queue and repetition of various conversions.
  59 * Set DELACK for segments received in order, but ack immediately
  60 * when segments are out of order (so fast retransmit can work).
  61 */
  62#ifdef TCP_ACK_HACK
  63#define TCP_REASS(tp, ti, m, so, flags) {\
  64       if ((ti)->ti_seq == (tp)->rcv_nxt && \
  65           tcpfrag_list_empty(tp) && \
  66           (tp)->t_state == TCPS_ESTABLISHED) {\
  67               if (ti->ti_flags & TH_PUSH) \
  68                       tp->t_flags |= TF_ACKNOW; \
  69               else \
  70                       tp->t_flags |= TF_DELACK; \
  71               (tp)->rcv_nxt += (ti)->ti_len; \
  72               flags = (ti)->ti_flags & TH_FIN; \
  73               if (so->so_emu) { \
  74                       if (tcp_emu((so),(m))) sbappend((so), (m)); \
  75               } else \
  76                       sbappend((so), (m)); \
  77        } else {\
  78               (flags) = tcp_reass((tp), (ti), (m)); \
  79               tp->t_flags |= TF_ACKNOW; \
  80       } \
  81}
  82#else
  83#define TCP_REASS(tp, ti, m, so, flags) { \
  84        if ((ti)->ti_seq == (tp)->rcv_nxt && \
  85        tcpfrag_list_empty(tp) && \
  86            (tp)->t_state == TCPS_ESTABLISHED) { \
  87                tp->t_flags |= TF_DELACK; \
  88                (tp)->rcv_nxt += (ti)->ti_len; \
  89                flags = (ti)->ti_flags & TH_FIN; \
  90                if (so->so_emu) { \
  91                        if (tcp_emu((so),(m))) sbappend(so, (m)); \
  92                } else \
  93                        sbappend((so), (m)); \
  94        } else { \
  95                (flags) = tcp_reass((tp), (ti), (m)); \
  96                tp->t_flags |= TF_ACKNOW; \
  97        } \
  98}
  99#endif
 100static void tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt,
 101                          struct tcpiphdr *ti);
 102static void tcp_xmit_timer(register struct tcpcb *tp, int rtt);
 103
 104static int
 105tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti,
 106          struct mbuf *m)
 107{
 108        register struct tcpiphdr *q;
 109        struct socket *so = tp->t_socket;
 110        int flags;
 111
 112        /*
 113         * Call with ti==NULL after become established to
 114         * force pre-ESTABLISHED data up to user socket.
 115         */
 116        if (ti == NULL)
 117                goto present;
 118
 119        /*
 120         * Find a segment which begins after this one does.
 121         */
 122        for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp);
 123            q = tcpiphdr_next(q))
 124                if (SEQ_GT(q->ti_seq, ti->ti_seq))
 125                        break;
 126
 127        /*
 128         * If there is a preceding segment, it may provide some of
 129         * our data already.  If so, drop the data from the incoming
 130         * segment.  If it provides all of our data, drop us.
 131         */
 132        if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) {
 133                register int i;
 134                q = tcpiphdr_prev(q);
 135                /* conversion to int (in i) handles seq wraparound */
 136                i = q->ti_seq + q->ti_len - ti->ti_seq;
 137                if (i > 0) {
 138                        if (i >= ti->ti_len) {
 139                                m_free(m);
 140                                /*
 141                                 * Try to present any queued data
 142                                 * at the left window edge to the user.
 143                                 * This is needed after the 3-WHS
 144                                 * completes.
 145                                 */
 146                                goto present;   /* ??? */
 147                        }
 148                        m_adj(m, i);
 149                        ti->ti_len -= i;
 150                        ti->ti_seq += i;
 151                }
 152                q = tcpiphdr_next(q);
 153        }
 154        ti->ti_mbuf = m;
 155
 156        /*
 157         * While we overlap succeeding segments trim them or,
 158         * if they are completely covered, dequeue them.
 159         */
 160        while (!tcpfrag_list_end(q, tp)) {
 161                register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
 162                if (i <= 0)
 163                        break;
 164                if (i < q->ti_len) {
 165                        q->ti_seq += i;
 166                        q->ti_len -= i;
 167                        m_adj(q->ti_mbuf, i);
 168                        break;
 169                }
 170                q = tcpiphdr_next(q);
 171                m = tcpiphdr_prev(q)->ti_mbuf;
 172                remque(tcpiphdr2qlink(tcpiphdr_prev(q)));
 173                m_free(m);
 174        }
 175
 176        /*
 177         * Stick new segment in its place.
 178         */
 179        insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q)));
 180
 181present:
 182        /*
 183         * Present data to user, advancing rcv_nxt through
 184         * completed sequence space.
 185         */
 186        if (!TCPS_HAVEESTABLISHED(tp->t_state))
 187                return (0);
 188        ti = tcpfrag_list_first(tp);
 189        if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt)
 190                return (0);
 191        if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
 192                return (0);
 193        do {
 194                tp->rcv_nxt += ti->ti_len;
 195                flags = ti->ti_flags & TH_FIN;
 196                remque(tcpiphdr2qlink(ti));
 197                m = ti->ti_mbuf;
 198                ti = tcpiphdr_next(ti);
 199                if (so->so_state & SS_FCANTSENDMORE)
 200                        m_free(m);
 201                else {
 202                        if (so->so_emu) {
 203                                if (tcp_emu(so,m)) sbappend(so, m);
 204                        } else
 205                                sbappend(so, m);
 206                }
 207        } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
 208        return (flags);
 209}
 210
 211/*
 212 * TCP input routine, follows pages 65-76 of the
 213 * protocol specification dated September, 1981 very closely.
 214 */
 215void
 216tcp_input(struct mbuf *m, int iphlen, struct socket *inso)
 217{
 218        struct ip save_ip, *ip;
 219        register struct tcpiphdr *ti;
 220        caddr_t optp = NULL;
 221        int optlen = 0;
 222        int len, tlen, off;
 223        register struct tcpcb *tp = NULL;
 224        register int tiflags;
 225        struct socket *so = NULL;
 226        int todrop, acked, ourfinisacked, needoutput = 0;
 227        int iss = 0;
 228        u_long tiwin;
 229        int ret;
 230    struct ex_list *ex_ptr;
 231    Slirp *slirp;
 232
 233        DEBUG_CALL("tcp_input");
 234        DEBUG_ARGS((dfd, " m = %8lx  iphlen = %2d  inso = %lx\n",
 235                    (long )m, iphlen, (long )inso ));
 236
 237        /*
 238         * If called with m == 0, then we're continuing the connect
 239         */
 240        if (m == NULL) {
 241                so = inso;
 242                slirp = so->slirp;
 243
 244                /* Re-set a few variables */
 245                tp = sototcpcb(so);
 246                m = so->so_m;
 247                so->so_m = NULL;
 248                ti = so->so_ti;
 249                tiwin = ti->ti_win;
 250                tiflags = ti->ti_flags;
 251
 252                goto cont_conn;
 253        }
 254        slirp = m->slirp;
 255
 256        /*
 257         * Get IP and TCP header together in first mbuf.
 258         * Note: IP leaves IP header in first mbuf.
 259         */
 260        ti = mtod(m, struct tcpiphdr *);
 261        if (iphlen > sizeof(struct ip )) {
 262          ip_stripoptions(m, (struct mbuf *)0);
 263          iphlen=sizeof(struct ip );
 264        }
 265        /* XXX Check if too short */
 266
 267
 268        /*
 269         * Save a copy of the IP header in case we want restore it
 270         * for sending an ICMP error message in response.
 271         */
 272        ip=mtod(m, struct ip *);
 273        save_ip = *ip;
 274        save_ip.ip_len+= iphlen;
 275
 276        /*
 277         * Checksum extended TCP header and data.
 278         */
 279        tlen = ((struct ip *)ti)->ip_len;
 280        tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;
 281        memset(&ti->ti_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr));
 282        ti->ti_x1 = 0;
 283        ti->ti_len = htons((uint16_t)tlen);
 284        len = sizeof(struct ip ) + tlen;
 285        if(cksum(m, len)) {
 286          goto drop;
 287        }
 288
 289        /*
 290         * Check that TCP offset makes sense,
 291         * pull out TCP options and adjust length.              XXX
 292         */
 293        off = ti->ti_off << 2;
 294        if (off < sizeof (struct tcphdr) || off > tlen) {
 295          goto drop;
 296        }
 297        tlen -= off;
 298        ti->ti_len = tlen;
 299        if (off > sizeof (struct tcphdr)) {
 300          optlen = off - sizeof (struct tcphdr);
 301          optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
 302        }
 303        tiflags = ti->ti_flags;
 304
 305        /*
 306         * Convert TCP protocol specific fields to host format.
 307         */
 308        NTOHL(ti->ti_seq);
 309        NTOHL(ti->ti_ack);
 310        NTOHS(ti->ti_win);
 311        NTOHS(ti->ti_urp);
 312
 313        /*
 314         * Drop TCP, IP headers and TCP options.
 315         */
 316        m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 317        m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 318
 319        /*
 320         * Locate pcb for segment.
 321         */
 322findso:
 323        so = slirp->tcp_last_so;
 324        if (so->so_fport != ti->ti_dport ||
 325            so->so_lport != ti->ti_sport ||
 326            so->so_laddr.s_addr != ti->ti_src.s_addr ||
 327            so->so_faddr.s_addr != ti->ti_dst.s_addr) {
 328                so = solookup(&slirp->tcb, ti->ti_src, ti->ti_sport,
 329                               ti->ti_dst, ti->ti_dport);
 330                if (so)
 331                        slirp->tcp_last_so = so;
 332        }
 333
 334        /*
 335         * If the state is CLOSED (i.e., TCB does not exist) then
 336         * all data in the incoming segment is discarded.
 337         * If the TCB exists but is in CLOSED state, it is embryonic,
 338         * but should either do a listen or a connect soon.
 339         *
 340         * state == CLOSED means we've done socreate() but haven't
 341         * attached it to a protocol yet...
 342         *
 343         * XXX If a TCB does not exist, and the TH_SYN flag is
 344         * the only flag set, then create a session, mark it
 345         * as if it was LISTENING, and continue...
 346         */
 347        if (so == NULL) {
 348          if (slirp->restricted) {
 349            /* Any hostfwds will have an existing socket, so we only get here
 350             * for non-hostfwd connections. These should be dropped, unless it
 351             * happens to be a guestfwd.
 352             */
 353            for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
 354                if (ex_ptr->ex_fport == ti->ti_dport &&
 355                    ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {
 356                    break;
 357                }
 358            }
 359            if (!ex_ptr) {
 360                goto dropwithreset;
 361            }
 362          }
 363
 364          if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
 365            goto dropwithreset;
 366
 367          if ((so = socreate(slirp)) == NULL)
 368            goto dropwithreset;
 369          if (tcp_attach(so) < 0) {
 370            free(so); /* Not sofree (if it failed, it's not insqued) */
 371            goto dropwithreset;
 372          }
 373
 374          sbreserve(&so->so_snd, TCP_SNDSPACE);
 375          sbreserve(&so->so_rcv, TCP_RCVSPACE);
 376
 377          so->so_laddr = ti->ti_src;
 378          so->so_lport = ti->ti_sport;
 379          so->so_faddr = ti->ti_dst;
 380          so->so_fport = ti->ti_dport;
 381
 382          if ((so->so_iptos = tcp_tos(so)) == 0)
 383            so->so_iptos = ((struct ip *)ti)->ip_tos;
 384
 385          tp = sototcpcb(so);
 386          tp->t_state = TCPS_LISTEN;
 387        }
 388
 389        /*
 390         * If this is a still-connecting socket, this probably
 391         * a retransmit of the SYN.  Whether it's a retransmit SYN
 392         * or something else, we nuke it.
 393         */
 394        if (so->so_state & SS_ISFCONNECTING)
 395                goto drop;
 396
 397        tp = sototcpcb(so);
 398
 399        /* XXX Should never fail */
 400        if (tp == NULL)
 401                goto dropwithreset;
 402        if (tp->t_state == TCPS_CLOSED)
 403                goto drop;
 404
 405        tiwin = ti->ti_win;
 406
 407        /*
 408         * Segment received on connection.
 409         * Reset idle time and keep-alive timer.
 410         */
 411        tp->t_idle = 0;
 412        if (SO_OPTIONS)
 413           tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
 414        else
 415           tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;
 416
 417        /*
 418         * Process options if not in LISTEN state,
 419         * else do it below (after getting remote address).
 420         */
 421        if (optp && tp->t_state != TCPS_LISTEN)
 422                tcp_dooptions(tp, (u_char *)optp, optlen, ti);
 423
 424        /*
 425         * Header prediction: check for the two common cases
 426         * of a uni-directional data xfer.  If the packet has
 427         * no control flags, is in-sequence, the window didn't
 428         * change and we're not retransmitting, it's a
 429         * candidate.  If the length is zero and the ack moved
 430         * forward, we're the sender side of the xfer.  Just
 431         * free the data acked & wake any higher level process
 432         * that was blocked waiting for space.  If the length
 433         * is non-zero and the ack didn't move, we're the
 434         * receiver side.  If we're getting packets in-order
 435         * (the reassembly queue is empty), add the data to
 436         * the socket buffer and note that we need a delayed ack.
 437         *
 438         * XXX Some of these tests are not needed
 439         * eg: the tiwin == tp->snd_wnd prevents many more
 440         * predictions.. with no *real* advantage..
 441         */
 442        if (tp->t_state == TCPS_ESTABLISHED &&
 443            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 444            ti->ti_seq == tp->rcv_nxt &&
 445            tiwin && tiwin == tp->snd_wnd &&
 446            tp->snd_nxt == tp->snd_max) {
 447                if (ti->ti_len == 0) {
 448                        if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
 449                            SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
 450                            tp->snd_cwnd >= tp->snd_wnd) {
 451                                /*
 452                                 * this is a pure ack for outstanding data.
 453                                 */
 454                                if (tp->t_rtt &&
 455                                    SEQ_GT(ti->ti_ack, tp->t_rtseq))
 456                                        tcp_xmit_timer(tp, tp->t_rtt);
 457                                acked = ti->ti_ack - tp->snd_una;
 458                                sbdrop(&so->so_snd, acked);
 459                                tp->snd_una = ti->ti_ack;
 460                                m_free(m);
 461
 462                                /*
 463                                 * If all outstanding data are acked, stop
 464                                 * retransmit timer, otherwise restart timer
 465                                 * using current (possibly backed-off) value.
 466                                 * If process is waiting for space,
 467                                 * wakeup/selwakeup/signal.  If data
 468                                 * are ready to send, let tcp_output
 469                                 * decide between more output or persist.
 470                                 */
 471                                if (tp->snd_una == tp->snd_max)
 472                                        tp->t_timer[TCPT_REXMT] = 0;
 473                                else if (tp->t_timer[TCPT_PERSIST] == 0)
 474                                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
 475
 476                                /*
 477                                 * This is called because sowwakeup might have
 478                                 * put data into so_snd.  Since we don't so sowwakeup,
 479                                 * we don't need this.. XXX???
 480                                 */
 481                                if (so->so_snd.sb_cc)
 482                                        (void) tcp_output(tp);
 483
 484                                return;
 485                        }
 486                } else if (ti->ti_ack == tp->snd_una &&
 487                    tcpfrag_list_empty(tp) &&
 488                    ti->ti_len <= sbspace(&so->so_rcv)) {
 489                        /*
 490                         * this is a pure, in-sequence data packet
 491                         * with nothing on the reassembly queue and
 492                         * we have enough buffer space to take it.
 493                         */
 494                        tp->rcv_nxt += ti->ti_len;
 495                        /*
 496                         * Add data to socket buffer.
 497                         */
 498                        if (so->so_emu) {
 499                                if (tcp_emu(so,m)) sbappend(so, m);
 500                        } else
 501                                sbappend(so, m);
 502
 503                        /*
 504                         * If this is a short packet, then ACK now - with Nagel
 505                         *      congestion avoidance sender won't send more until
 506                         *      he gets an ACK.
 507                         *
 508                         * It is better to not delay acks at all to maximize
 509                         * TCP throughput.  See RFC 2581.
 510                         */
 511                        tp->t_flags |= TF_ACKNOW;
 512                        tcp_output(tp);
 513                        return;
 514                }
 515        } /* header prediction */
 516        /*
 517         * Calculate amount of space in receive window,
 518         * and then do TCP input processing.
 519         * Receive window is amount of space in rcv queue,
 520         * but not less than advertised window.
 521         */
 522        { int win;
 523          win = sbspace(&so->so_rcv);
 524          if (win < 0)
 525            win = 0;
 526          tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 527        }
 528
 529        switch (tp->t_state) {
 530
 531        /*
 532         * If the state is LISTEN then ignore segment if it contains an RST.
 533         * If the segment contains an ACK then it is bad and send a RST.
 534         * If it does not contain a SYN then it is not interesting; drop it.
 535         * Don't bother responding if the destination was a broadcast.
 536         * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
 537         * tp->iss, and send a segment:
 538         *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
 539         * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
 540         * Fill in remote peer address fields if not previously specified.
 541         * Enter SYN_RECEIVED state, and process any other fields of this
 542         * segment in this state.
 543         */
 544        case TCPS_LISTEN: {
 545
 546          if (tiflags & TH_RST)
 547            goto drop;
 548          if (tiflags & TH_ACK)
 549            goto dropwithreset;
 550          if ((tiflags & TH_SYN) == 0)
 551            goto drop;
 552
 553          /*
 554           * This has way too many gotos...
 555           * But a bit of spaghetti code never hurt anybody :)
 556           */
 557
 558          /*
 559           * If this is destined for the control address, then flag to
 560           * tcp_ctl once connected, otherwise connect
 561           */
 562          if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
 563              slirp->vnetwork_addr.s_addr) {
 564            if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr &&
 565                so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) {
 566                /* May be an add exec */
 567                for (ex_ptr = slirp->exec_list; ex_ptr;
 568                     ex_ptr = ex_ptr->ex_next) {
 569                  if(ex_ptr->ex_fport == so->so_fport &&
 570                     so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) {
 571                    so->so_state |= SS_CTL;
 572                    break;
 573                  }
 574                }
 575                if (so->so_state & SS_CTL) {
 576                    goto cont_input;
 577                }
 578            }
 579            /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
 580          }
 581
 582          if (so->so_emu & EMU_NOCONNECT) {
 583            so->so_emu &= ~EMU_NOCONNECT;
 584            goto cont_input;
 585          }
 586
 587          if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
 588            u_char code=ICMP_UNREACH_NET;
 589            DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n",
 590                        errno,strerror(errno)));
 591            if(errno == ECONNREFUSED) {
 592              /* ACK the SYN, send RST to refuse the connection */
 593              tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
 594                          TH_RST|TH_ACK);
 595            } else {
 596              if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
 597              HTONL(ti->ti_seq);             /* restore tcp header */
 598              HTONL(ti->ti_ack);
 599              HTONS(ti->ti_win);
 600              HTONS(ti->ti_urp);
 601              m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 602              m->m_len  += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 603              *ip=save_ip;
 604              icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno));
 605            }
 606            tcp_close(tp);
 607            m_free(m);
 608          } else {
 609            /*
 610             * Haven't connected yet, save the current mbuf
 611             * and ti, and return
 612             * XXX Some OS's don't tell us whether the connect()
 613             * succeeded or not.  So we must time it out.
 614             */
 615            so->so_m = m;
 616            so->so_ti = ti;
 617            tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
 618            tp->t_state = TCPS_SYN_RECEIVED;
 619            tcp_template(tp);
 620          }
 621          return;
 622
 623        cont_conn:
 624          /* m==NULL
 625           * Check if the connect succeeded
 626           */
 627          if (so->so_state & SS_NOFDREF) {
 628            tp = tcp_close(tp);
 629            goto dropwithreset;
 630          }
 631        cont_input:
 632          tcp_template(tp);
 633
 634          if (optp)
 635            tcp_dooptions(tp, (u_char *)optp, optlen, ti);
 636
 637          if (iss)
 638            tp->iss = iss;
 639          else
 640            tp->iss = slirp->tcp_iss;
 641          slirp->tcp_iss += TCP_ISSINCR/2;
 642          tp->irs = ti->ti_seq;
 643          tcp_sendseqinit(tp);
 644          tcp_rcvseqinit(tp);
 645          tp->t_flags |= TF_ACKNOW;
 646          tp->t_state = TCPS_SYN_RECEIVED;
 647          tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
 648          goto trimthenstep6;
 649        } /* case TCPS_LISTEN */
 650
 651        /*
 652         * If the state is SYN_SENT:
 653         *      if seg contains an ACK, but not for our SYN, drop the input.
 654         *      if seg contains a RST, then drop the connection.
 655         *      if seg does not contain SYN, then drop it.
 656         * Otherwise this is an acceptable SYN segment
 657         *      initialize tp->rcv_nxt and tp->irs
 658         *      if seg contains ack then advance tp->snd_una
 659         *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 660         *      arrange for segment to be acked (eventually)
 661         *      continue processing rest of data/controls, beginning with URG
 662         */
 663        case TCPS_SYN_SENT:
 664                if ((tiflags & TH_ACK) &&
 665                    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
 666                     SEQ_GT(ti->ti_ack, tp->snd_max)))
 667                        goto dropwithreset;
 668
 669                if (tiflags & TH_RST) {
 670                        if (tiflags & TH_ACK) {
 671                                tcp_drop(tp, 0); /* XXX Check t_softerror! */
 672                        }
 673                        goto drop;
 674                }
 675
 676                if ((tiflags & TH_SYN) == 0)
 677                        goto drop;
 678                if (tiflags & TH_ACK) {
 679                        tp->snd_una = ti->ti_ack;
 680                        if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 681                                tp->snd_nxt = tp->snd_una;
 682                }
 683
 684                tp->t_timer[TCPT_REXMT] = 0;
 685                tp->irs = ti->ti_seq;
 686                tcp_rcvseqinit(tp);
 687                tp->t_flags |= TF_ACKNOW;
 688                if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
 689                        soisfconnected(so);
 690                        tp->t_state = TCPS_ESTABLISHED;
 691
 692                        (void) tcp_reass(tp, (struct tcpiphdr *)0,
 693                                (struct mbuf *)0);
 694                        /*
 695                         * if we didn't have to retransmit the SYN,
 696                         * use its rtt as our initial srtt & rtt var.
 697                         */
 698                        if (tp->t_rtt)
 699                                tcp_xmit_timer(tp, tp->t_rtt);
 700                } else
 701                        tp->t_state = TCPS_SYN_RECEIVED;
 702
 703trimthenstep6:
 704                /*
 705                 * Advance ti->ti_seq to correspond to first data byte.
 706                 * If data, trim to stay within window,
 707                 * dropping FIN if necessary.
 708                 */
 709                ti->ti_seq++;
 710                if (ti->ti_len > tp->rcv_wnd) {
 711                        todrop = ti->ti_len - tp->rcv_wnd;
 712                        m_adj(m, -todrop);
 713                        ti->ti_len = tp->rcv_wnd;
 714                        tiflags &= ~TH_FIN;
 715                }
 716                tp->snd_wl1 = ti->ti_seq - 1;
 717                tp->rcv_up = ti->ti_seq;
 718                goto step6;
 719        } /* switch tp->t_state */
 720        /*
 721         * States other than LISTEN or SYN_SENT.
 722         * Check that at least some bytes of segment are within
 723         * receive window.  If segment begins before rcv_nxt,
 724         * drop leading data (and SYN); if nothing left, just ack.
 725         */
 726        todrop = tp->rcv_nxt - ti->ti_seq;
 727        if (todrop > 0) {
 728                if (tiflags & TH_SYN) {
 729                        tiflags &= ~TH_SYN;
 730                        ti->ti_seq++;
 731                        if (ti->ti_urp > 1)
 732                                ti->ti_urp--;
 733                        else
 734                                tiflags &= ~TH_URG;
 735                        todrop--;
 736                }
 737                /*
 738                 * Following if statement from Stevens, vol. 2, p. 960.
 739                 */
 740                if (todrop > ti->ti_len
 741                    || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
 742                        /*
 743                         * Any valid FIN must be to the left of the window.
 744                         * At this point the FIN must be a duplicate or out
 745                         * of sequence; drop it.
 746                         */
 747                        tiflags &= ~TH_FIN;
 748
 749                        /*
 750                         * Send an ACK to resynchronize and drop any data.
 751                         * But keep on processing for RST or ACK.
 752                         */
 753                        tp->t_flags |= TF_ACKNOW;
 754                        todrop = ti->ti_len;
 755                }
 756                m_adj(m, todrop);
 757                ti->ti_seq += todrop;
 758                ti->ti_len -= todrop;
 759                if (ti->ti_urp > todrop)
 760                        ti->ti_urp -= todrop;
 761                else {
 762                        tiflags &= ~TH_URG;
 763                        ti->ti_urp = 0;
 764                }
 765        }
 766        /*
 767         * If new data are received on a connection after the
 768         * user processes are gone, then RST the other end.
 769         */
 770        if ((so->so_state & SS_NOFDREF) &&
 771            tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
 772                tp = tcp_close(tp);
 773                goto dropwithreset;
 774        }
 775
 776        /*
 777         * If segment ends after window, drop trailing data
 778         * (and PUSH and FIN); if nothing left, just ACK.
 779         */
 780        todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
 781        if (todrop > 0) {
 782                if (todrop >= ti->ti_len) {
 783                        /*
 784                         * If a new connection request is received
 785                         * while in TIME_WAIT, drop the old connection
 786                         * and start over if the sequence numbers
 787                         * are above the previous ones.
 788                         */
 789                        if (tiflags & TH_SYN &&
 790                            tp->t_state == TCPS_TIME_WAIT &&
 791                            SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
 792                                iss = tp->rcv_nxt + TCP_ISSINCR;
 793                                tp = tcp_close(tp);
 794                                goto findso;
 795                        }
 796                        /*
 797                         * If window is closed can only take segments at
 798                         * window edge, and have to drop data and PUSH from
 799                         * incoming segments.  Continue processing, but
 800                         * remember to ack.  Otherwise, drop segment
 801                         * and ack.
 802                         */
 803                        if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
 804                                tp->t_flags |= TF_ACKNOW;
 805                        } else {
 806                                goto dropafterack;
 807                        }
 808                }
 809                m_adj(m, -todrop);
 810                ti->ti_len -= todrop;
 811                tiflags &= ~(TH_PUSH|TH_FIN);
 812        }
 813
 814        /*
 815         * If the RST bit is set examine the state:
 816         *    SYN_RECEIVED STATE:
 817         *      If passive open, return to LISTEN state.
 818         *      If active open, inform user that connection was refused.
 819         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
 820         *      Inform user that connection was reset, and close tcb.
 821         *    CLOSING, LAST_ACK, TIME_WAIT STATES
 822         *      Close the tcb.
 823         */
 824        if (tiflags&TH_RST) switch (tp->t_state) {
 825
 826        case TCPS_SYN_RECEIVED:
 827        case TCPS_ESTABLISHED:
 828        case TCPS_FIN_WAIT_1:
 829        case TCPS_FIN_WAIT_2:
 830        case TCPS_CLOSE_WAIT:
 831                tp->t_state = TCPS_CLOSED;
 832                tcp_close(tp);
 833                goto drop;
 834
 835        case TCPS_CLOSING:
 836        case TCPS_LAST_ACK:
 837        case TCPS_TIME_WAIT:
 838                tcp_close(tp);
 839                goto drop;
 840        }
 841
 842        /*
 843         * If a SYN is in the window, then this is an
 844         * error and we send an RST and drop the connection.
 845         */
 846        if (tiflags & TH_SYN) {
 847                tp = tcp_drop(tp,0);
 848                goto dropwithreset;
 849        }
 850
 851        /*
 852         * If the ACK bit is off we drop the segment and return.
 853         */
 854        if ((tiflags & TH_ACK) == 0) goto drop;
 855
 856        /*
 857         * Ack processing.
 858         */
 859        switch (tp->t_state) {
 860        /*
 861         * In SYN_RECEIVED state if the ack ACKs our SYN then enter
 862         * ESTABLISHED state and continue processing, otherwise
 863         * send an RST.  una<=ack<=max
 864         */
 865        case TCPS_SYN_RECEIVED:
 866
 867                if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
 868                    SEQ_GT(ti->ti_ack, tp->snd_max))
 869                        goto dropwithreset;
 870                tp->t_state = TCPS_ESTABLISHED;
 871                /*
 872                 * The sent SYN is ack'ed with our sequence number +1
 873                 * The first data byte already in the buffer will get
 874                 * lost if no correction is made.  This is only needed for
 875                 * SS_CTL since the buffer is empty otherwise.
 876                 * tp->snd_una++; or:
 877                 */
 878                tp->snd_una=ti->ti_ack;
 879                if (so->so_state & SS_CTL) {
 880                  /* So tcp_ctl reports the right state */
 881                  ret = tcp_ctl(so);
 882                  if (ret == 1) {
 883                    soisfconnected(so);
 884                    so->so_state &= ~SS_CTL;   /* success XXX */
 885                  } else if (ret == 2) {
 886                    so->so_state &= SS_PERSISTENT_MASK;
 887                    so->so_state |= SS_NOFDREF; /* CTL_CMD */
 888                  } else {
 889                    needoutput = 1;
 890                    tp->t_state = TCPS_FIN_WAIT_1;
 891                  }
 892                } else {
 893                  soisfconnected(so);
 894                }
 895
 896                (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
 897                tp->snd_wl1 = ti->ti_seq - 1;
 898                /* Avoid ack processing; snd_una==ti_ack  =>  dup ack */
 899                goto synrx_to_est;
 900                /* fall into ... */
 901
 902        /*
 903         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 904         * ACKs.  If the ack is in the range
 905         *      tp->snd_una < ti->ti_ack <= tp->snd_max
 906         * then advance tp->snd_una to ti->ti_ack and drop
 907         * data from the retransmission queue.  If this ACK reflects
 908         * more up to date window information we update our window information.
 909         */
 910        case TCPS_ESTABLISHED:
 911        case TCPS_FIN_WAIT_1:
 912        case TCPS_FIN_WAIT_2:
 913        case TCPS_CLOSE_WAIT:
 914        case TCPS_CLOSING:
 915        case TCPS_LAST_ACK:
 916        case TCPS_TIME_WAIT:
 917
 918                if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
 919                        if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
 920                          DEBUG_MISC((dfd, " dup ack  m = %lx  so = %lx\n",
 921                                      (long )m, (long )so));
 922                                /*
 923                                 * If we have outstanding data (other than
 924                                 * a window probe), this is a completely
 925                                 * duplicate ack (ie, window info didn't
 926                                 * change), the ack is the biggest we've
 927                                 * seen and we've seen exactly our rexmt
 928                                 * threshold of them, assume a packet
 929                                 * has been dropped and retransmit it.
 930                                 * Kludge snd_nxt & the congestion
 931                                 * window so we send only this one
 932                                 * packet.
 933                                 *
 934                                 * We know we're losing at the current
 935                                 * window size so do congestion avoidance
 936                                 * (set ssthresh to half the current window
 937                                 * and pull our congestion window back to
 938                                 * the new ssthresh).
 939                                 *
 940                                 * Dup acks mean that packets have left the
 941                                 * network (they're now cached at the receiver)
 942                                 * so bump cwnd by the amount in the receiver
 943                                 * to keep a constant cwnd packets in the
 944                                 * network.
 945                                 */
 946                                if (tp->t_timer[TCPT_REXMT] == 0 ||
 947                                    ti->ti_ack != tp->snd_una)
 948                                        tp->t_dupacks = 0;
 949                                else if (++tp->t_dupacks == TCPREXMTTHRESH) {
 950                                        tcp_seq onxt = tp->snd_nxt;
 951                                        u_int win =
 952                                            min(tp->snd_wnd, tp->snd_cwnd) / 2 /
 953                                                tp->t_maxseg;
 954
 955                                        if (win < 2)
 956                                                win = 2;
 957                                        tp->snd_ssthresh = win * tp->t_maxseg;
 958                                        tp->t_timer[TCPT_REXMT] = 0;
 959                                        tp->t_rtt = 0;
 960                                        tp->snd_nxt = ti->ti_ack;
 961                                        tp->snd_cwnd = tp->t_maxseg;
 962                                        (void) tcp_output(tp);
 963                                        tp->snd_cwnd = tp->snd_ssthresh +
 964                                               tp->t_maxseg * tp->t_dupacks;
 965                                        if (SEQ_GT(onxt, tp->snd_nxt))
 966                                                tp->snd_nxt = onxt;
 967                                        goto drop;
 968                                } else if (tp->t_dupacks > TCPREXMTTHRESH) {
 969                                        tp->snd_cwnd += tp->t_maxseg;
 970                                        (void) tcp_output(tp);
 971                                        goto drop;
 972                                }
 973                        } else
 974                                tp->t_dupacks = 0;
 975                        break;
 976                }
 977        synrx_to_est:
 978                /*
 979                 * If the congestion window was inflated to account
 980                 * for the other side's cached packets, retract it.
 981                 */
 982                if (tp->t_dupacks > TCPREXMTTHRESH &&
 983                    tp->snd_cwnd > tp->snd_ssthresh)
 984                        tp->snd_cwnd = tp->snd_ssthresh;
 985                tp->t_dupacks = 0;
 986                if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
 987                        goto dropafterack;
 988                }
 989                acked = ti->ti_ack - tp->snd_una;
 990
 991                /*
 992                 * If transmit timer is running and timed sequence
 993                 * number was acked, update smoothed round trip time.
 994                 * Since we now have an rtt measurement, cancel the
 995                 * timer backoff (cf., Phil Karn's retransmit alg.).
 996                 * Recompute the initial retransmit timer.
 997                 */
 998                if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
 999                        tcp_xmit_timer(tp,tp->t_rtt);
1000
1001                /*
1002                 * If all outstanding data is acked, stop retransmit
1003                 * timer and remember to restart (more output or persist).
1004                 * If there is more data to be acked, restart retransmit
1005                 * timer, using current (possibly backed-off) value.
1006                 */
1007                if (ti->ti_ack == tp->snd_max) {
1008                        tp->t_timer[TCPT_REXMT] = 0;
1009                        needoutput = 1;
1010                } else if (tp->t_timer[TCPT_PERSIST] == 0)
1011                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1012                /*
1013                 * When new data is acked, open the congestion window.
1014                 * If the window gives us less than ssthresh packets
1015                 * in flight, open exponentially (maxseg per packet).
1016                 * Otherwise open linearly: maxseg per window
1017                 * (maxseg^2 / cwnd per packet).
1018                 */
1019                {
1020                  register u_int cw = tp->snd_cwnd;
1021                  register u_int incr = tp->t_maxseg;
1022
1023                  if (cw > tp->snd_ssthresh)
1024                    incr = incr * incr / cw;
1025                  tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1026                }
1027                if (acked > so->so_snd.sb_cc) {
1028                        tp->snd_wnd -= so->so_snd.sb_cc;
1029                        sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
1030                        ourfinisacked = 1;
1031                } else {
1032                        sbdrop(&so->so_snd, acked);
1033                        tp->snd_wnd -= acked;
1034                        ourfinisacked = 0;
1035                }
1036                tp->snd_una = ti->ti_ack;
1037                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1038                        tp->snd_nxt = tp->snd_una;
1039
1040                switch (tp->t_state) {
1041
1042                /*
1043                 * In FIN_WAIT_1 STATE in addition to the processing
1044                 * for the ESTABLISHED state if our FIN is now acknowledged
1045                 * then enter FIN_WAIT_2.
1046                 */
1047                case TCPS_FIN_WAIT_1:
1048                        if (ourfinisacked) {
1049                                /*
1050                                 * If we can't receive any more
1051                                 * data, then closing user can proceed.
1052                                 * Starting the timer is contrary to the
1053                                 * specification, but if we don't get a FIN
1054                                 * we'll hang forever.
1055                                 */
1056                                if (so->so_state & SS_FCANTRCVMORE) {
1057                                        tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;
1058                                }
1059                                tp->t_state = TCPS_FIN_WAIT_2;
1060                        }
1061                        break;
1062
1063                /*
1064                 * In CLOSING STATE in addition to the processing for
1065                 * the ESTABLISHED state if the ACK acknowledges our FIN
1066                 * then enter the TIME-WAIT state, otherwise ignore
1067                 * the segment.
1068                 */
1069                case TCPS_CLOSING:
1070                        if (ourfinisacked) {
1071                                tp->t_state = TCPS_TIME_WAIT;
1072                                tcp_canceltimers(tp);
1073                                tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1074                        }
1075                        break;
1076
1077                /*
1078                 * In LAST_ACK, we may still be waiting for data to drain
1079                 * and/or to be acked, as well as for the ack of our FIN.
1080                 * If our FIN is now acknowledged, delete the TCB,
1081                 * enter the closed state and return.
1082                 */
1083                case TCPS_LAST_ACK:
1084                        if (ourfinisacked) {
1085                                tcp_close(tp);
1086                                goto drop;
1087                        }
1088                        break;
1089
1090                /*
1091                 * In TIME_WAIT state the only thing that should arrive
1092                 * is a retransmission of the remote FIN.  Acknowledge
1093                 * it and restart the finack timer.
1094                 */
1095                case TCPS_TIME_WAIT:
1096                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1097                        goto dropafterack;
1098                }
1099        } /* switch(tp->t_state) */
1100
1101step6:
1102        /*
1103         * Update window information.
1104         * Don't look at window if no ACK: TAC's send garbage on first SYN.
1105         */
1106        if ((tiflags & TH_ACK) &&
1107            (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1108            (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1109            (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1110                tp->snd_wnd = tiwin;
1111                tp->snd_wl1 = ti->ti_seq;
1112                tp->snd_wl2 = ti->ti_ack;
1113                if (tp->snd_wnd > tp->max_sndwnd)
1114                        tp->max_sndwnd = tp->snd_wnd;
1115                needoutput = 1;
1116        }
1117
1118        /*
1119         * Process segments with URG.
1120         */
1121        if ((tiflags & TH_URG) && ti->ti_urp &&
1122            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1123                /*
1124                 * This is a kludge, but if we receive and accept
1125                 * random urgent pointers, we'll crash in
1126                 * soreceive.  It's hard to imagine someone
1127                 * actually wanting to send this much urgent data.
1128                 */
1129                if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
1130                        ti->ti_urp = 0;
1131                        tiflags &= ~TH_URG;
1132                        goto dodata;
1133                }
1134                /*
1135                 * If this segment advances the known urgent pointer,
1136                 * then mark the data stream.  This should not happen
1137                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1138                 * a FIN has been received from the remote side.
1139                 * In these states we ignore the URG.
1140                 *
1141                 * According to RFC961 (Assigned Protocols),
1142                 * the urgent pointer points to the last octet
1143                 * of urgent data.  We continue, however,
1144                 * to consider it to indicate the first octet
1145                 * of data past the urgent section as the original
1146                 * spec states (in one of two places).
1147                 */
1148                if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
1149                        tp->rcv_up = ti->ti_seq + ti->ti_urp;
1150                        so->so_urgc =  so->so_rcv.sb_cc +
1151                                (tp->rcv_up - tp->rcv_nxt); /* -1; */
1152                        tp->rcv_up = ti->ti_seq + ti->ti_urp;
1153
1154                }
1155        } else
1156                /*
1157                 * If no out of band data is expected,
1158                 * pull receive urgent pointer along
1159                 * with the receive window.
1160                 */
1161                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1162                        tp->rcv_up = tp->rcv_nxt;
1163dodata:
1164
1165        /*
1166         * If this is a small packet, then ACK now - with Nagel
1167         *      congestion avoidance sender won't send more until
1168         *      he gets an ACK.
1169         */
1170        if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
1171            ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
1172                tp->t_flags |= TF_ACKNOW;
1173        }
1174
1175        /*
1176         * Process the segment text, merging it into the TCP sequencing queue,
1177         * and arranging for acknowledgment of receipt if necessary.
1178         * This process logically involves adjusting tp->rcv_wnd as data
1179         * is presented to the user (this happens in tcp_usrreq.c,
1180         * case PRU_RCVD).  If a FIN has already been received on this
1181         * connection then we just ignore the text.
1182         */
1183        if ((ti->ti_len || (tiflags&TH_FIN)) &&
1184            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1185                TCP_REASS(tp, ti, m, so, tiflags);
1186        } else {
1187                m_free(m);
1188                tiflags &= ~TH_FIN;
1189        }
1190
1191        /*
1192         * If FIN is received ACK the FIN and let the user know
1193         * that the connection is closing.
1194         */
1195        if (tiflags & TH_FIN) {
1196                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1197                        /*
1198                         * If we receive a FIN we can't send more data,
1199                         * set it SS_FDRAIN
1200                         * Shutdown the socket if there is no rx data in the
1201                         * buffer.
1202                         * soread() is called on completion of shutdown() and
1203                         * will got to TCPS_LAST_ACK, and use tcp_output()
1204                         * to send the FIN.
1205                         */
1206                        sofwdrain(so);
1207
1208                        tp->t_flags |= TF_ACKNOW;
1209                        tp->rcv_nxt++;
1210                }
1211                switch (tp->t_state) {
1212
1213                /*
1214                 * In SYN_RECEIVED and ESTABLISHED STATES
1215                 * enter the CLOSE_WAIT state.
1216                 */
1217                case TCPS_SYN_RECEIVED:
1218                case TCPS_ESTABLISHED:
1219                  if(so->so_emu == EMU_CTL)        /* no shutdown on socket */
1220                    tp->t_state = TCPS_LAST_ACK;
1221                  else
1222                    tp->t_state = TCPS_CLOSE_WAIT;
1223                  break;
1224
1225                /*
1226                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
1227                 * enter the CLOSING state.
1228                 */
1229                case TCPS_FIN_WAIT_1:
1230                        tp->t_state = TCPS_CLOSING;
1231                        break;
1232
1233                /*
1234                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
1235                 * starting the time-wait timer, turning off the other
1236                 * standard timers.
1237                 */
1238                case TCPS_FIN_WAIT_2:
1239                        tp->t_state = TCPS_TIME_WAIT;
1240                        tcp_canceltimers(tp);
1241                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1242                        break;
1243
1244                /*
1245                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
1246                 */
1247                case TCPS_TIME_WAIT:
1248                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1249                        break;
1250                }
1251        }
1252
1253        /*
1254         * Return any desired output.
1255         */
1256        if (needoutput || (tp->t_flags & TF_ACKNOW)) {
1257                (void) tcp_output(tp);
1258        }
1259        return;
1260
1261dropafterack:
1262        /*
1263         * Generate an ACK dropping incoming segment if it occupies
1264         * sequence space, where the ACK reflects our state.
1265         */
1266        if (tiflags & TH_RST)
1267                goto drop;
1268        m_free(m);
1269        tp->t_flags |= TF_ACKNOW;
1270        (void) tcp_output(tp);
1271        return;
1272
1273dropwithreset:
1274        /* reuses m if m!=NULL, m_free() unnecessary */
1275        if (tiflags & TH_ACK)
1276                tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
1277        else {
1278                if (tiflags & TH_SYN) ti->ti_len++;
1279                tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1280                    TH_RST|TH_ACK);
1281        }
1282
1283        return;
1284
1285drop:
1286        /*
1287         * Drop space held by incoming segment and return.
1288         */
1289        m_free(m);
1290}
1291
1292static void
1293tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti)
1294{
1295        uint16_t mss;
1296        int opt, optlen;
1297
1298        DEBUG_CALL("tcp_dooptions");
1299        DEBUG_ARGS((dfd, " tp = %lx  cnt=%i\n", (long)tp, cnt));
1300
1301        for (; cnt > 0; cnt -= optlen, cp += optlen) {
1302                opt = cp[0];
1303                if (opt == TCPOPT_EOL)
1304                        break;
1305                if (opt == TCPOPT_NOP)
1306                        optlen = 1;
1307                else {
1308                        optlen = cp[1];
1309                        if (optlen <= 0)
1310                                break;
1311                }
1312                switch (opt) {
1313
1314                default:
1315                        continue;
1316
1317                case TCPOPT_MAXSEG:
1318                        if (optlen != TCPOLEN_MAXSEG)
1319                                continue;
1320                        if (!(ti->ti_flags & TH_SYN))
1321                                continue;
1322                        memcpy((char *) &mss, (char *) cp + 2, sizeof(mss));
1323                        NTOHS(mss);
1324                        (void) tcp_mss(tp, mss);        /* sets t_maxseg */
1325                        break;
1326                }
1327        }
1328}
1329
1330
1331/*
1332 * Pull out of band byte out of a segment so
1333 * it doesn't appear in the user's data queue.
1334 * It is still reflected in the segment length for
1335 * sequencing purposes.
1336 */
1337
1338#ifdef notdef
1339
1340void
1341tcp_pulloutofband(so, ti, m)
1342        struct socket *so;
1343        struct tcpiphdr *ti;
1344        register struct mbuf *m;
1345{
1346        int cnt = ti->ti_urp - 1;
1347
1348        while (cnt >= 0) {
1349                if (m->m_len > cnt) {
1350                        char *cp = mtod(m, caddr_t) + cnt;
1351                        struct tcpcb *tp = sototcpcb(so);
1352
1353                        tp->t_iobc = *cp;
1354                        tp->t_oobflags |= TCPOOB_HAVEDATA;
1355                        memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1));
1356                        m->m_len--;
1357                        return;
1358                }
1359                cnt -= m->m_len;
1360                m = m->m_next; /* XXX WRONG! Fix it! */
1361                if (m == 0)
1362                        break;
1363        }
1364        panic("tcp_pulloutofband");
1365}
1366
1367#endif /* notdef */
1368
1369/*
1370 * Collect new round-trip time estimate
1371 * and update averages and current timeout.
1372 */
1373
1374static void
1375tcp_xmit_timer(register struct tcpcb *tp, int rtt)
1376{
1377        register short delta;
1378
1379        DEBUG_CALL("tcp_xmit_timer");
1380        DEBUG_ARG("tp = %lx", (long)tp);
1381        DEBUG_ARG("rtt = %d", rtt);
1382
1383        if (tp->t_srtt != 0) {
1384                /*
1385                 * srtt is stored as fixed point with 3 bits after the
1386                 * binary point (i.e., scaled by 8).  The following magic
1387                 * is equivalent to the smoothing algorithm in rfc793 with
1388                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1389                 * point).  Adjust rtt to origin 0.
1390                 */
1391                delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1392                if ((tp->t_srtt += delta) <= 0)
1393                        tp->t_srtt = 1;
1394                /*
1395                 * We accumulate a smoothed rtt variance (actually, a
1396                 * smoothed mean difference), then set the retransmit
1397                 * timer to smoothed rtt + 4 times the smoothed variance.
1398                 * rttvar is stored as fixed point with 2 bits after the
1399                 * binary point (scaled by 4).  The following is
1400                 * equivalent to rfc793 smoothing with an alpha of .75
1401                 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
1402                 * rfc793's wired-in beta.
1403                 */
1404                if (delta < 0)
1405                        delta = -delta;
1406                delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1407                if ((tp->t_rttvar += delta) <= 0)
1408                        tp->t_rttvar = 1;
1409        } else {
1410                /*
1411                 * No rtt measurement yet - use the unsmoothed rtt.
1412                 * Set the variance to half the rtt (so our first
1413                 * retransmit happens at 3*rtt).
1414                 */
1415                tp->t_srtt = rtt << TCP_RTT_SHIFT;
1416                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1417        }
1418        tp->t_rtt = 0;
1419        tp->t_rxtshift = 0;
1420
1421        /*
1422         * the retransmit should happen at rtt + 4 * rttvar.
1423         * Because of the way we do the smoothing, srtt and rttvar
1424         * will each average +1/2 tick of bias.  When we compute
1425         * the retransmit timer, we want 1/2 tick of rounding and
1426         * 1 extra tick because of +-1/2 tick uncertainty in the
1427         * firing of the timer.  The bias will give us exactly the
1428         * 1.5 tick we need.  But, because the bias is
1429         * statistical, we have to test that we don't drop below
1430         * the minimum feasible timer (which is 2 ticks).
1431         */
1432        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
1433            (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
1434
1435        /*
1436         * We received an ack for a packet that wasn't retransmitted;
1437         * it is probably safe to discard any error indications we've
1438         * received recently.  This isn't quite right, but close enough
1439         * for now (a route might have failed after we sent a segment,
1440         * and the return path might not be symmetrical).
1441         */
1442        tp->t_softerror = 0;
1443}
1444
1445/*
1446 * Determine a reasonable value for maxseg size.
1447 * If the route is known, check route for mtu.
1448 * If none, use an mss that can be handled on the outgoing
1449 * interface without forcing IP to fragment; if bigger than
1450 * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
1451 * to utilize large mbufs.  If no route is found, route has no mtu,
1452 * or the destination isn't local, use a default, hopefully conservative
1453 * size (usually 512 or the default IP max size, but no more than the mtu
1454 * of the interface), as we can't discover anything about intervening
1455 * gateways or networks.  We also initialize the congestion/slow start
1456 * window to be a single segment if the destination isn't local.
1457 * While looking at the routing entry, we also initialize other path-dependent
1458 * parameters from pre-set or cached values in the routing entry.
1459 */
1460
1461int
1462tcp_mss(struct tcpcb *tp, u_int offer)
1463{
1464        struct socket *so = tp->t_socket;
1465        int mss;
1466
1467        DEBUG_CALL("tcp_mss");
1468        DEBUG_ARG("tp = %lx", (long)tp);
1469        DEBUG_ARG("offer = %d", offer);
1470
1471        mss = min(IF_MTU, IF_MRU) - sizeof(struct tcpiphdr);
1472        if (offer)
1473                mss = min(mss, offer);
1474        mss = max(mss, 32);
1475        if (mss < tp->t_maxseg || offer != 0)
1476           tp->t_maxseg = mss;
1477
1478        tp->snd_cwnd = mss;
1479
1480        sbreserve(&so->so_snd, TCP_SNDSPACE + ((TCP_SNDSPACE % mss) ?
1481                                               (mss - (TCP_SNDSPACE % mss)) :
1482                                               0));
1483        sbreserve(&so->so_rcv, TCP_RCVSPACE + ((TCP_RCVSPACE % mss) ?
1484                                               (mss - (TCP_RCVSPACE % mss)) :
1485                                               0));
1486
1487        DEBUG_MISC((dfd, " returning mss = %d\n", mss));
1488
1489        return mss;
1490}
1491