qemu/slirp/socket.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 1995 Danny Gasparovski.
   3 *
   4 * Please read the file COPYRIGHT for the
   5 * terms and conditions of the copyright.
   6 */
   7
   8#include "qemu/osdep.h"
   9#include "qemu-common.h"
  10#include "slirp.h"
  11#include "ip_icmp.h"
  12#ifdef __sun__
  13#include <sys/filio.h>
  14#endif
  15
  16static void sofcantrcvmore(struct socket *so);
  17static void sofcantsendmore(struct socket *so);
  18
  19struct socket *solookup(struct socket **last, struct socket *head,
  20        struct sockaddr_storage *lhost, struct sockaddr_storage *fhost)
  21{
  22    struct socket *so = *last;
  23
  24    /* Optimisation */
  25    if (so != head && sockaddr_equal(&(so->lhost.ss), lhost)
  26            && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
  27        return so;
  28    }
  29
  30    for (so = head->so_next; so != head; so = so->so_next) {
  31        if (sockaddr_equal(&(so->lhost.ss), lhost)
  32                && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
  33            *last = so;
  34            return so;
  35        }
  36    }
  37
  38    return (struct socket *)NULL;
  39}
  40
  41/*
  42 * Create a new socket, initialise the fields
  43 * It is the responsibility of the caller to
  44 * insque() it into the correct linked-list
  45 */
  46struct socket *
  47socreate(Slirp *slirp)
  48{
  49    struct socket *so = g_new(struct socket, 1);
  50
  51    memset(so, 0, sizeof(struct socket));
  52    so->so_state = SS_NOFDREF;
  53    so->s = -1;
  54    so->slirp = slirp;
  55    so->pollfds_idx = -1;
  56
  57    return so;
  58}
  59
  60/*
  61 * Remove references to so from the given message queue.
  62 */
  63static void
  64soqfree(struct socket *so, struct quehead *qh)
  65{
  66    struct mbuf *ifq;
  67
  68    for (ifq = (struct mbuf *) qh->qh_link;
  69             (struct quehead *) ifq != qh;
  70             ifq = ifq->ifq_next) {
  71        if (ifq->ifq_so == so) {
  72            struct mbuf *ifm;
  73            ifq->ifq_so = NULL;
  74            for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) {
  75                ifm->ifq_so = NULL;
  76            }
  77        }
  78    }
  79}
  80
  81/*
  82 * remque and free a socket, clobber cache
  83 */
  84void
  85sofree(struct socket *so)
  86{
  87  Slirp *slirp = so->slirp;
  88
  89  soqfree(so, &slirp->if_fastq);
  90  soqfree(so, &slirp->if_batchq);
  91
  92  if (so->so_emu==EMU_RSH && so->extra) {
  93        sofree(so->extra);
  94        so->extra=NULL;
  95  }
  96  if (so == slirp->tcp_last_so) {
  97      slirp->tcp_last_so = &slirp->tcb;
  98  } else if (so == slirp->udp_last_so) {
  99      slirp->udp_last_so = &slirp->udb;
 100  } else if (so == slirp->icmp_last_so) {
 101      slirp->icmp_last_so = &slirp->icmp;
 102  }
 103  m_free(so->so_m);
 104
 105  if(so->so_next && so->so_prev)
 106    remque(so);  /* crashes if so is not in a queue */
 107
 108  if (so->so_tcpcb) {
 109      free(so->so_tcpcb);
 110  }
 111  g_free(so);
 112}
 113
 114size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np)
 115{
 116        int n, lss, total;
 117        struct sbuf *sb = &so->so_snd;
 118        int len = sb->sb_datalen - sb->sb_cc;
 119        int mss = so->so_tcpcb->t_maxseg;
 120
 121        DEBUG_CALL("sopreprbuf");
 122        DEBUG_ARG("so = %p", so);
 123
 124        if (len <= 0)
 125                return 0;
 126
 127        iov[0].iov_base = sb->sb_wptr;
 128        iov[1].iov_base = NULL;
 129        iov[1].iov_len = 0;
 130        if (sb->sb_wptr < sb->sb_rptr) {
 131                iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
 132                /* Should never succeed, but... */
 133                if (iov[0].iov_len > len)
 134                   iov[0].iov_len = len;
 135                if (iov[0].iov_len > mss)
 136                   iov[0].iov_len -= iov[0].iov_len%mss;
 137                n = 1;
 138        } else {
 139                iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
 140                /* Should never succeed, but... */
 141                if (iov[0].iov_len > len) iov[0].iov_len = len;
 142                len -= iov[0].iov_len;
 143                if (len) {
 144                        iov[1].iov_base = sb->sb_data;
 145                        iov[1].iov_len = sb->sb_rptr - sb->sb_data;
 146                        if(iov[1].iov_len > len)
 147                           iov[1].iov_len = len;
 148                        total = iov[0].iov_len + iov[1].iov_len;
 149                        if (total > mss) {
 150                                lss = total%mss;
 151                                if (iov[1].iov_len > lss) {
 152                                        iov[1].iov_len -= lss;
 153                                        n = 2;
 154                                } else {
 155                                        lss -= iov[1].iov_len;
 156                                        iov[0].iov_len -= lss;
 157                                        n = 1;
 158                                }
 159                        } else
 160                                n = 2;
 161                } else {
 162                        if (iov[0].iov_len > mss)
 163                           iov[0].iov_len -= iov[0].iov_len%mss;
 164                        n = 1;
 165                }
 166        }
 167        if (np)
 168                *np = n;
 169
 170        return iov[0].iov_len + (n - 1) * iov[1].iov_len;
 171}
 172
 173/*
 174 * Read from so's socket into sb_snd, updating all relevant sbuf fields
 175 * NOTE: This will only be called if it is select()ed for reading, so
 176 * a read() of 0 (or less) means it's disconnected
 177 */
 178int
 179soread(struct socket *so)
 180{
 181        int n, nn;
 182        struct sbuf *sb = &so->so_snd;
 183        struct iovec iov[2];
 184
 185        DEBUG_CALL("soread");
 186        DEBUG_ARG("so = %p", so);
 187
 188        /*
 189         * No need to check if there's enough room to read.
 190         * soread wouldn't have been called if there weren't
 191         */
 192        sopreprbuf(so, iov, &n);
 193
 194#ifdef HAVE_READV
 195        nn = readv(so->s, (struct iovec *)iov, n);
 196        DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
 197#else
 198        nn = qemu_recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
 199#endif
 200        if (nn <= 0) {
 201                if (nn < 0 && (errno == EINTR || errno == EAGAIN))
 202                        return 0;
 203                else {
 204                        int err;
 205                        socklen_t elen = sizeof err;
 206                        struct sockaddr_storage addr;
 207                        struct sockaddr *paddr = (struct sockaddr *) &addr;
 208                        socklen_t alen = sizeof addr;
 209
 210                        err = errno;
 211                        if (nn == 0) {
 212                                if (getpeername(so->s, paddr, &alen) < 0) {
 213                                        err = errno;
 214                                } else {
 215                                        getsockopt(so->s, SOL_SOCKET, SO_ERROR,
 216                                                &err, &elen);
 217                                }
 218                        }
 219
 220                        DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno)));
 221                        sofcantrcvmore(so);
 222
 223                        if (err == ECONNRESET || err == ECONNREFUSED
 224                            || err == ENOTCONN || err == EPIPE) {
 225                                tcp_drop(sototcpcb(so), err);
 226                        } else {
 227                                tcp_sockclosed(sototcpcb(so));
 228                        }
 229                        return -1;
 230                }
 231        }
 232
 233#ifndef HAVE_READV
 234        /*
 235         * If there was no error, try and read the second time round
 236         * We read again if n = 2 (ie, there's another part of the buffer)
 237         * and we read as much as we could in the first read
 238         * We don't test for <= 0 this time, because there legitimately
 239         * might not be any more data (since the socket is non-blocking),
 240         * a close will be detected on next iteration.
 241         * A return of -1 won't (shouldn't) happen, since it didn't happen above
 242         */
 243        if (n == 2 && nn == iov[0].iov_len) {
 244            int ret;
 245            ret = qemu_recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
 246            if (ret > 0)
 247                nn += ret;
 248        }
 249
 250        DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
 251#endif
 252
 253        /* Update fields */
 254        sb->sb_cc += nn;
 255        sb->sb_wptr += nn;
 256        if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
 257                sb->sb_wptr -= sb->sb_datalen;
 258        return nn;
 259}
 260
 261int soreadbuf(struct socket *so, const char *buf, int size)
 262{
 263    int n, nn, copy = size;
 264        struct sbuf *sb = &so->so_snd;
 265        struct iovec iov[2];
 266
 267        DEBUG_CALL("soreadbuf");
 268        DEBUG_ARG("so = %p", so);
 269
 270        /*
 271         * No need to check if there's enough room to read.
 272         * soread wouldn't have been called if there weren't
 273         */
 274        if (sopreprbuf(so, iov, &n) < size)
 275        goto err;
 276
 277    nn = MIN(iov[0].iov_len, copy);
 278    memcpy(iov[0].iov_base, buf, nn);
 279
 280    copy -= nn;
 281    buf += nn;
 282
 283    if (copy == 0)
 284        goto done;
 285
 286    memcpy(iov[1].iov_base, buf, copy);
 287
 288done:
 289    /* Update fields */
 290        sb->sb_cc += size;
 291        sb->sb_wptr += size;
 292        if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
 293                sb->sb_wptr -= sb->sb_datalen;
 294    return size;
 295err:
 296
 297    sofcantrcvmore(so);
 298    tcp_sockclosed(sototcpcb(so));
 299    fprintf(stderr, "soreadbuf buffer to small");
 300    return -1;
 301}
 302
 303/*
 304 * Get urgent data
 305 *
 306 * When the socket is created, we set it SO_OOBINLINE,
 307 * so when OOB data arrives, we soread() it and everything
 308 * in the send buffer is sent as urgent data
 309 */
 310int
 311sorecvoob(struct socket *so)
 312{
 313        struct tcpcb *tp = sototcpcb(so);
 314        int ret;
 315
 316        DEBUG_CALL("sorecvoob");
 317        DEBUG_ARG("so = %p", so);
 318
 319        /*
 320         * We take a guess at how much urgent data has arrived.
 321         * In most situations, when urgent data arrives, the next
 322         * read() should get all the urgent data.  This guess will
 323         * be wrong however if more data arrives just after the
 324         * urgent data, or the read() doesn't return all the
 325         * urgent data.
 326         */
 327        ret = soread(so);
 328        if (ret > 0) {
 329            tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
 330            tp->t_force = 1;
 331            tcp_output(tp);
 332            tp->t_force = 0;
 333        }
 334
 335        return ret;
 336}
 337
 338/*
 339 * Send urgent data
 340 * There's a lot duplicated code here, but...
 341 */
 342int
 343sosendoob(struct socket *so)
 344{
 345        struct sbuf *sb = &so->so_rcv;
 346        char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
 347
 348        int n;
 349
 350        DEBUG_CALL("sosendoob");
 351        DEBUG_ARG("so = %p", so);
 352        DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
 353
 354        if (so->so_urgc > 2048)
 355           so->so_urgc = 2048; /* XXXX */
 356
 357        if (sb->sb_rptr < sb->sb_wptr) {
 358                /* We can send it directly */
 359                n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
 360        } else {
 361                /*
 362                 * Since there's no sendv or sendtov like writev,
 363                 * we must copy all data to a linear buffer then
 364                 * send it all
 365                 */
 366                uint32_t urgc = so->so_urgc;
 367                int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
 368                if (len > urgc) {
 369                        len = urgc;
 370                }
 371                memcpy(buff, sb->sb_rptr, len);
 372                urgc -= len;
 373                if (urgc) {
 374                        n = sb->sb_wptr - sb->sb_data;
 375                        if (n > urgc) {
 376                                n = urgc;
 377                        }
 378                        memcpy((buff + len), sb->sb_data, n);
 379                        len += n;
 380                }
 381                n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
 382#ifdef DEBUG
 383                if (n != len) {
 384                        DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
 385                }
 386#endif
 387        }
 388
 389        if (n < 0) {
 390                return n;
 391        }
 392        so->so_urgc -= n;
 393        DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
 394
 395        sb->sb_cc -= n;
 396        sb->sb_rptr += n;
 397        if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
 398                sb->sb_rptr -= sb->sb_datalen;
 399
 400        return n;
 401}
 402
 403/*
 404 * Write data from so_rcv to so's socket,
 405 * updating all sbuf field as necessary
 406 */
 407int
 408sowrite(struct socket *so)
 409{
 410        int  n,nn;
 411        struct sbuf *sb = &so->so_rcv;
 412        int len = sb->sb_cc;
 413        struct iovec iov[2];
 414
 415        DEBUG_CALL("sowrite");
 416        DEBUG_ARG("so = %p", so);
 417
 418        if (so->so_urgc) {
 419                uint32_t expected = so->so_urgc;
 420                if (sosendoob(so) < expected) {
 421                        /* Treat a short write as a fatal error too,
 422                         * rather than continuing on and sending the urgent
 423                         * data as if it were non-urgent and leaving the
 424                         * so_urgc count wrong.
 425                         */
 426                        goto err_disconnected;
 427                }
 428                if (sb->sb_cc == 0)
 429                        return 0;
 430        }
 431
 432        /*
 433         * No need to check if there's something to write,
 434         * sowrite wouldn't have been called otherwise
 435         */
 436
 437        iov[0].iov_base = sb->sb_rptr;
 438        iov[1].iov_base = NULL;
 439        iov[1].iov_len = 0;
 440        if (sb->sb_rptr < sb->sb_wptr) {
 441                iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
 442                /* Should never succeed, but... */
 443                if (iov[0].iov_len > len) iov[0].iov_len = len;
 444                n = 1;
 445        } else {
 446                iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
 447                if (iov[0].iov_len > len) iov[0].iov_len = len;
 448                len -= iov[0].iov_len;
 449                if (len) {
 450                        iov[1].iov_base = sb->sb_data;
 451                        iov[1].iov_len = sb->sb_wptr - sb->sb_data;
 452                        if (iov[1].iov_len > len) iov[1].iov_len = len;
 453                        n = 2;
 454                } else
 455                        n = 1;
 456        }
 457        /* Check if there's urgent data to send, and if so, send it */
 458
 459#ifdef HAVE_READV
 460        nn = writev(so->s, (const struct iovec *)iov, n);
 461
 462        DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
 463#else
 464        nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0);
 465#endif
 466        /* This should never happen, but people tell me it does *shrug* */
 467        if (nn < 0 && (errno == EAGAIN || errno == EINTR))
 468                return 0;
 469
 470        if (nn <= 0) {
 471                goto err_disconnected;
 472        }
 473
 474#ifndef HAVE_READV
 475        if (n == 2 && nn == iov[0].iov_len) {
 476            int ret;
 477            ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0);
 478            if (ret > 0)
 479                nn += ret;
 480        }
 481        DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
 482#endif
 483
 484        /* Update sbuf */
 485        sb->sb_cc -= nn;
 486        sb->sb_rptr += nn;
 487        if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
 488                sb->sb_rptr -= sb->sb_datalen;
 489
 490        /*
 491         * If in DRAIN mode, and there's no more data, set
 492         * it CANTSENDMORE
 493         */
 494        if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
 495                sofcantsendmore(so);
 496
 497        return nn;
 498
 499err_disconnected:
 500        DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
 501                    so->so_state, errno));
 502        sofcantsendmore(so);
 503        tcp_sockclosed(sototcpcb(so));
 504        return -1;
 505}
 506
 507/*
 508 * recvfrom() a UDP socket
 509 */
 510void
 511sorecvfrom(struct socket *so)
 512{
 513        struct sockaddr_storage addr;
 514        struct sockaddr_storage saddr, daddr;
 515        socklen_t addrlen = sizeof(struct sockaddr_storage);
 516
 517        DEBUG_CALL("sorecvfrom");
 518        DEBUG_ARG("so = %p", so);
 519
 520        if (so->so_type == IPPROTO_ICMP) {   /* This is a "ping" reply */
 521          char buff[256];
 522          int len;
 523
 524          len = recvfrom(so->s, buff, 256, 0,
 525                         (struct sockaddr *)&addr, &addrlen);
 526          /* XXX Check if reply is "correct"? */
 527
 528          if(len == -1 || len == 0) {
 529            u_char code=ICMP_UNREACH_PORT;
 530
 531            if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
 532            else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET;
 533
 534            DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
 535                        errno,strerror(errno)));
 536            icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
 537          } else {
 538            icmp_reflect(so->so_m);
 539            so->so_m = NULL; /* Don't m_free() it again! */
 540          }
 541          /* No need for this socket anymore, udp_detach it */
 542          udp_detach(so);
 543        } else {                                /* A "normal" UDP packet */
 544          struct mbuf *m;
 545          int len;
 546#ifdef _WIN32
 547          unsigned long n;
 548#else
 549          int n;
 550#endif
 551
 552          m = m_get(so->slirp);
 553          if (!m) {
 554              return;
 555          }
 556          switch (so->so_ffamily) {
 557          case AF_INET:
 558              m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr);
 559              break;
 560          case AF_INET6:
 561              m->m_data += IF_MAXLINKHDR + sizeof(struct ip6)
 562                                         + sizeof(struct udphdr);
 563              break;
 564          default:
 565              g_assert_not_reached();
 566              break;
 567          }
 568
 569          /*
 570           * XXX Shouldn't FIONREAD packets destined for port 53,
 571           * but I don't know the max packet size for DNS lookups
 572           */
 573          len = M_FREEROOM(m);
 574          /* if (so->so_fport != htons(53)) { */
 575          ioctlsocket(so->s, FIONREAD, &n);
 576
 577          if (n > len) {
 578            n = (m->m_data - m->m_dat) + m->m_len + n + 1;
 579            m_inc(m, n);
 580            len = M_FREEROOM(m);
 581          }
 582          /* } */
 583
 584          m->m_len = recvfrom(so->s, m->m_data, len, 0,
 585                              (struct sockaddr *)&addr, &addrlen);
 586          DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
 587                      m->m_len, errno,strerror(errno)));
 588          if(m->m_len<0) {
 589            /* Report error as ICMP */
 590            switch (so->so_lfamily) {
 591            uint8_t code;
 592            case AF_INET:
 593              code = ICMP_UNREACH_PORT;
 594
 595              if (errno == EHOSTUNREACH) {
 596                code = ICMP_UNREACH_HOST;
 597              } else if (errno == ENETUNREACH) {
 598                code = ICMP_UNREACH_NET;
 599              }
 600
 601              DEBUG_MISC((dfd, " rx error, tx icmp ICMP_UNREACH:%i\n", code));
 602              icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
 603              break;
 604            case AF_INET6:
 605              code = ICMP6_UNREACH_PORT;
 606
 607              if (errno == EHOSTUNREACH) {
 608                code = ICMP6_UNREACH_ADDRESS;
 609              } else if (errno == ENETUNREACH) {
 610                code = ICMP6_UNREACH_NO_ROUTE;
 611              }
 612
 613              DEBUG_MISC((dfd, " rx error, tx icmp6 ICMP_UNREACH:%i\n", code));
 614              icmp6_send_error(so->so_m, ICMP6_UNREACH, code);
 615              break;
 616            default:
 617              g_assert_not_reached();
 618              break;
 619            }
 620            m_free(m);
 621          } else {
 622          /*
 623           * Hack: domain name lookup will be used the most for UDP,
 624           * and since they'll only be used once there's no need
 625           * for the 4 minute (or whatever) timeout... So we time them
 626           * out much quicker (10 seconds  for now...)
 627           */
 628            if (so->so_expire) {
 629              if (so->so_fport == htons(53))
 630                so->so_expire = curtime + SO_EXPIREFAST;
 631              else
 632                so->so_expire = curtime + SO_EXPIRE;
 633            }
 634
 635            /*
 636             * If this packet was destined for CTL_ADDR,
 637             * make it look like that's where it came from
 638             */
 639            saddr = addr;
 640            sotranslate_in(so, &saddr);
 641            daddr = so->lhost.ss;
 642
 643            switch (so->so_ffamily) {
 644            case AF_INET:
 645                udp_output(so, m, (struct sockaddr_in *) &saddr,
 646                           (struct sockaddr_in *) &daddr,
 647                           so->so_iptos);
 648                break;
 649            case AF_INET6:
 650                udp6_output(so, m, (struct sockaddr_in6 *) &saddr,
 651                            (struct sockaddr_in6 *) &daddr);
 652                break;
 653            default:
 654                g_assert_not_reached();
 655                break;
 656            }
 657          } /* rx error */
 658        } /* if ping packet */
 659}
 660
 661/*
 662 * sendto() a socket
 663 */
 664int
 665sosendto(struct socket *so, struct mbuf *m)
 666{
 667        int ret;
 668        struct sockaddr_storage addr;
 669
 670        DEBUG_CALL("sosendto");
 671        DEBUG_ARG("so = %p", so);
 672        DEBUG_ARG("m = %p", m);
 673
 674        addr = so->fhost.ss;
 675        DEBUG_CALL(" sendto()ing)");
 676        sotranslate_out(so, &addr);
 677
 678        /* Don't care what port we get */
 679        ret = sendto(so->s, m->m_data, m->m_len, 0,
 680                     (struct sockaddr *)&addr, sockaddr_size(&addr));
 681        if (ret < 0)
 682                return -1;
 683
 684        /*
 685         * Kill the socket if there's no reply in 4 minutes,
 686         * but only if it's an expirable socket
 687         */
 688        if (so->so_expire)
 689                so->so_expire = curtime + SO_EXPIRE;
 690        so->so_state &= SS_PERSISTENT_MASK;
 691        so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */
 692        return 0;
 693}
 694
 695/*
 696 * Listen for incoming TCP connections
 697 */
 698struct socket *
 699tcp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr,
 700           u_int lport, int flags)
 701{
 702        struct sockaddr_in addr;
 703        struct socket *so;
 704        int s, opt = 1;
 705        socklen_t addrlen = sizeof(addr);
 706        memset(&addr, 0, addrlen);
 707
 708        DEBUG_CALL("tcp_listen");
 709        DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){.s_addr = haddr}));
 710        DEBUG_ARG("hport = %d", ntohs(hport));
 711        DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){.s_addr = laddr}));
 712        DEBUG_ARG("lport = %d", ntohs(lport));
 713        DEBUG_ARG("flags = %x", flags);
 714
 715        so = socreate(slirp);
 716
 717        /* Don't tcp_attach... we don't need so_snd nor so_rcv */
 718        if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) {
 719            g_free(so);
 720            return NULL;
 721        }
 722        insque(so, &slirp->tcb);
 723
 724        /*
 725         * SS_FACCEPTONCE sockets must time out.
 726         */
 727        if (flags & SS_FACCEPTONCE)
 728           so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
 729
 730        so->so_state &= SS_PERSISTENT_MASK;
 731        so->so_state |= (SS_FACCEPTCONN | flags);
 732        so->so_lfamily = AF_INET;
 733        so->so_lport = lport; /* Kept in network format */
 734        so->so_laddr.s_addr = laddr; /* Ditto */
 735
 736        addr.sin_family = AF_INET;
 737        addr.sin_addr.s_addr = haddr;
 738        addr.sin_port = hport;
 739
 740        if (((s = qemu_socket(AF_INET,SOCK_STREAM,0)) < 0) ||
 741            (socket_set_fast_reuse(s) < 0) ||
 742            (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) ||
 743            (listen(s,1) < 0)) {
 744                int tmperrno = errno; /* Don't clobber the real reason we failed */
 745
 746                if (s >= 0) {
 747                    closesocket(s);
 748                }
 749                sofree(so);
 750                /* Restore the real errno */
 751#ifdef _WIN32
 752                WSASetLastError(tmperrno);
 753#else
 754                errno = tmperrno;
 755#endif
 756                return NULL;
 757        }
 758        qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int));
 759        opt = 1;
 760        qemu_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int));
 761
 762        getsockname(s,(struct sockaddr *)&addr,&addrlen);
 763        so->so_ffamily = AF_INET;
 764        so->so_fport = addr.sin_port;
 765        if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
 766           so->so_faddr = slirp->vhost_addr;
 767        else
 768           so->so_faddr = addr.sin_addr;
 769
 770        so->s = s;
 771        return so;
 772}
 773
 774/*
 775 * Various session state calls
 776 * XXX Should be #define's
 777 * The socket state stuff needs work, these often get call 2 or 3
 778 * times each when only 1 was needed
 779 */
 780void
 781soisfconnecting(struct socket *so)
 782{
 783        so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
 784                          SS_FCANTSENDMORE|SS_FWDRAIN);
 785        so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
 786}
 787
 788void
 789soisfconnected(struct socket *so)
 790{
 791        so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
 792        so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
 793}
 794
 795static void
 796sofcantrcvmore(struct socket *so)
 797{
 798        if ((so->so_state & SS_NOFDREF) == 0) {
 799                shutdown(so->s,0);
 800        }
 801        so->so_state &= ~(SS_ISFCONNECTING);
 802        if (so->so_state & SS_FCANTSENDMORE) {
 803           so->so_state &= SS_PERSISTENT_MASK;
 804           so->so_state |= SS_NOFDREF; /* Don't select it */
 805        } else {
 806           so->so_state |= SS_FCANTRCVMORE;
 807        }
 808}
 809
 810static void
 811sofcantsendmore(struct socket *so)
 812{
 813        if ((so->so_state & SS_NOFDREF) == 0) {
 814            shutdown(so->s,1);           /* send FIN to fhost */
 815        }
 816        so->so_state &= ~(SS_ISFCONNECTING);
 817        if (so->so_state & SS_FCANTRCVMORE) {
 818           so->so_state &= SS_PERSISTENT_MASK;
 819           so->so_state |= SS_NOFDREF; /* as above */
 820        } else {
 821           so->so_state |= SS_FCANTSENDMORE;
 822        }
 823}
 824
 825/*
 826 * Set write drain mode
 827 * Set CANTSENDMORE once all data has been write()n
 828 */
 829void
 830sofwdrain(struct socket *so)
 831{
 832        if (so->so_rcv.sb_cc)
 833                so->so_state |= SS_FWDRAIN;
 834        else
 835                sofcantsendmore(so);
 836}
 837
 838/*
 839 * Translate addr in host addr when it is a virtual address
 840 */
 841void sotranslate_out(struct socket *so, struct sockaddr_storage *addr)
 842{
 843    Slirp *slirp = so->slirp;
 844    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
 845    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
 846
 847    switch (addr->ss_family) {
 848    case AF_INET:
 849        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
 850                slirp->vnetwork_addr.s_addr) {
 851            /* It's an alias */
 852            if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) {
 853                if (get_dns_addr(&sin->sin_addr) < 0) {
 854                    sin->sin_addr = loopback_addr;
 855                }
 856            } else {
 857                sin->sin_addr = loopback_addr;
 858            }
 859        }
 860
 861        DEBUG_MISC((dfd, " addr.sin_port=%d, "
 862            "addr.sin_addr.s_addr=%.16s\n",
 863            ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)));
 864        break;
 865
 866    case AF_INET6:
 867        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
 868                    slirp->vprefix_len)) {
 869            if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) {
 870                uint32_t scope_id;
 871                if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) {
 872                    sin6->sin6_scope_id = scope_id;
 873                } else {
 874                    sin6->sin6_addr = in6addr_loopback;
 875                }
 876            } else {
 877                sin6->sin6_addr = in6addr_loopback;
 878            }
 879        }
 880        break;
 881
 882    default:
 883        break;
 884    }
 885}
 886
 887void sotranslate_in(struct socket *so, struct sockaddr_storage *addr)
 888{
 889    Slirp *slirp = so->slirp;
 890    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
 891    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
 892
 893    switch (addr->ss_family) {
 894    case AF_INET:
 895        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
 896            slirp->vnetwork_addr.s_addr) {
 897            uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr;
 898
 899            if ((so->so_faddr.s_addr & inv_mask) == inv_mask) {
 900                sin->sin_addr = slirp->vhost_addr;
 901            } else if (sin->sin_addr.s_addr == loopback_addr.s_addr ||
 902                       so->so_faddr.s_addr != slirp->vhost_addr.s_addr) {
 903                sin->sin_addr = so->so_faddr;
 904            }
 905        }
 906        break;
 907
 908    case AF_INET6:
 909        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
 910                    slirp->vprefix_len)) {
 911            if (in6_equal(&sin6->sin6_addr, &in6addr_loopback)
 912                    || !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) {
 913                sin6->sin6_addr = so->so_faddr6;
 914            }
 915        }
 916        break;
 917
 918    default:
 919        break;
 920    }
 921}
 922
 923/*
 924 * Translate connections from localhost to the real hostname
 925 */
 926void sotranslate_accept(struct socket *so)
 927{
 928    Slirp *slirp = so->slirp;
 929
 930    switch (so->so_ffamily) {
 931    case AF_INET:
 932        if (so->so_faddr.s_addr == INADDR_ANY ||
 933            (so->so_faddr.s_addr & loopback_mask) ==
 934            (loopback_addr.s_addr & loopback_mask)) {
 935           so->so_faddr = slirp->vhost_addr;
 936        }
 937        break;
 938
 939   case AF_INET6:
 940        if (in6_equal(&so->so_faddr6, &in6addr_any) ||
 941                in6_equal(&so->so_faddr6, &in6addr_loopback)) {
 942           so->so_faddr6 = slirp->vhost_addr6;
 943        }
 944        break;
 945
 946    default:
 947        break;
 948    }
 949}
 950