busybox/networking/wget.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wget - retrieve a file using HTTP or FTP
   4 *
   5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
   6 *
   7 * Licensed under GPLv2, see file LICENSE in this tarball for details.
   8 */
   9#include "libbb.h"
  10
  11struct host_info {
  12        // May be used if we ever will want to free() all xstrdup()s...
  13        /* char *allocated; */
  14        const char *path;
  15        const char *user;
  16        char       *host;
  17        int         port;
  18        smallint    is_ftp;
  19};
  20
  21
  22/* Globals (can be accessed from signal handlers) */
  23struct globals {
  24        off_t content_len;        /* Content-length of the file */
  25        off_t beg_range;          /* Range at which continue begins */
  26#if ENABLE_FEATURE_WGET_STATUSBAR
  27        off_t transferred;        /* Number of bytes transferred so far */
  28        const char *curfile;      /* Name of current file being transferred */
  29        bb_progress_t pmt;
  30#endif
  31        smallint chunked;         /* chunked transfer encoding */
  32        smallint got_clen;        /* got content-length: from server  */
  33} FIX_ALIASING;
  34#define G (*(struct globals*)&bb_common_bufsiz1)
  35struct BUG_G_too_big {
  36        char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
  37};
  38#define INIT_G() do { } while (0)
  39
  40
  41#if ENABLE_FEATURE_WGET_STATUSBAR
  42
  43static void progress_meter(int flag)
  44{
  45        /* We can be called from signal handler */
  46        int save_errno = errno;
  47
  48        if (flag == -1) { /* first call to progress_meter */
  49                bb_progress_init(&G.pmt);
  50        }
  51
  52        bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
  53                           G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
  54
  55        if (flag == 0) {
  56                /* last call to progress_meter */
  57                alarm(0);
  58                bb_putchar_stderr('\n');
  59                G.transferred = 0;
  60        } else {
  61                if (flag == -1) { /* first call to progress_meter */
  62                        signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
  63                }
  64                alarm(1);
  65        }
  66
  67        errno = save_errno;
  68}
  69
  70#else /* FEATURE_WGET_STATUSBAR */
  71
  72static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
  73
  74#endif
  75
  76
  77/* IPv6 knows scoped address types i.e. link and site local addresses. Link
  78 * local addresses can have a scope identifier to specify the
  79 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
  80 * identifier is only valid on a single node.
  81 *
  82 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
  83 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
  84 * in the Host header as invalid requests, see
  85 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
  86 */
  87static void strip_ipv6_scope_id(char *host)
  88{
  89        char *scope, *cp;
  90
  91        /* bbox wget actually handles IPv6 addresses without [], like
  92         * wget "http://::1/xxx", but this is not standard.
  93         * To save code, _here_ we do not support it. */
  94
  95        if (host[0] != '[')
  96                return; /* not IPv6 */
  97
  98        scope = strchr(host, '%');
  99        if (!scope)
 100                return;
 101
 102        /* Remove the IPv6 zone identifier from the host address */
 103        cp = strchr(host, ']');
 104        if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
 105                /* malformed address (not "[xx]:nn" or "[xx]") */
 106                return;
 107        }
 108
 109        /* cp points to "]...", scope points to "%eth0]..." */
 110        overlapping_strcpy(scope, cp);
 111}
 112
 113/* Read NMEMB bytes into PTR from STREAM.  Returns the number of bytes read,
 114 * and a short count if an eof or non-interrupt error is encountered.  */
 115static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
 116{
 117        size_t ret;
 118        char *p = (char*)ptr;
 119
 120        do {
 121                clearerr(stream);
 122                errno = 0;
 123                ret = fread(p, 1, nmemb, stream);
 124                p += ret;
 125                nmemb -= ret;
 126        } while (nmemb && ferror(stream) && errno == EINTR);
 127
 128        return p - (char*)ptr;
 129}
 130
 131/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
 132 * Returns S, or NULL if an eof or non-interrupt error is encountered.  */
 133static char *safe_fgets(char *s, int size, FILE *stream)
 134{
 135        char *ret;
 136
 137        do {
 138                clearerr(stream);
 139                errno = 0;
 140                ret = fgets(s, size, stream);
 141        } while (ret == NULL && ferror(stream) && errno == EINTR);
 142
 143        return ret;
 144}
 145
 146#if ENABLE_FEATURE_WGET_AUTHENTICATION
 147/* Base64-encode character string. buf is assumed to be char buf[512]. */
 148static char *base64enc_512(char buf[512], const char *str)
 149{
 150        unsigned len = strlen(str);
 151        if (len > 512/4*3 - 10) /* paranoia */
 152                len = 512/4*3 - 10;
 153        bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
 154        return buf;
 155}
 156#endif
 157
 158static char* sanitize_string(char *s)
 159{
 160        unsigned char *p = (void *) s;
 161        while (*p >= ' ')
 162                p++;
 163        *p = '\0';
 164        return s;
 165}
 166
 167static FILE *open_socket(len_and_sockaddr *lsa)
 168{
 169        FILE *fp;
 170
 171        /* glibc 2.4 seems to try seeking on it - ??! */
 172        /* hopefully it understands what ESPIPE means... */
 173        fp = fdopen(xconnect_stream(lsa), "r+");
 174        if (fp == NULL)
 175                bb_perror_msg_and_die("fdopen");
 176
 177        return fp;
 178}
 179
 180static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
 181{
 182        int result;
 183        if (s1) {
 184                if (!s2) s2 = "";
 185                fprintf(fp, "%s%s\r\n", s1, s2);
 186                fflush(fp);
 187        }
 188
 189        do {
 190                char *buf_ptr;
 191
 192                if (fgets(buf, 510, fp) == NULL) {
 193                        bb_perror_msg_and_die("error getting response");
 194                }
 195                buf_ptr = strstr(buf, "\r\n");
 196                if (buf_ptr) {
 197                        *buf_ptr = '\0';
 198                }
 199        } while (!isdigit(buf[0]) || buf[3] != ' ');
 200
 201        buf[3] = '\0';
 202        result = xatoi_u(buf);
 203        buf[3] = ' ';
 204        return result;
 205}
 206
 207static void parse_url(char *src_url, struct host_info *h)
 208{
 209        char *url, *p, *sp;
 210
 211        /* h->allocated = */ url = xstrdup(src_url);
 212
 213        if (strncmp(url, "http://", 7) == 0) {
 214                h->port = bb_lookup_port("http", "tcp", 80);
 215                h->host = url + 7;
 216                h->is_ftp = 0;
 217        } else if (strncmp(url, "ftp://", 6) == 0) {
 218                h->port = bb_lookup_port("ftp", "tcp", 21);
 219                h->host = url + 6;
 220                h->is_ftp = 1;
 221        } else
 222                bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
 223
 224        // FYI:
 225        // "Real" wget 'http://busybox.net?var=a/b' sends this request:
 226        //   'GET /?var=a/b HTTP 1.0'
 227        //   and saves 'index.html?var=a%2Fb' (we save 'b')
 228        // wget 'http://busybox.net?login=john@doe':
 229        //   request: 'GET /?login=john@doe HTTP/1.0'
 230        //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
 231        // wget 'http://busybox.net#test/test':
 232        //   request: 'GET / HTTP/1.0'
 233        //   saves: 'index.html' (we save 'test')
 234        //
 235        // We also don't add unique .N suffix if file exists...
 236        sp = strchr(h->host, '/');
 237        p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
 238        p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
 239        if (!sp) {
 240                h->path = "";
 241        } else if (*sp == '/') {
 242                *sp = '\0';
 243                h->path = sp + 1;
 244        } else { // '#' or '?'
 245                // http://busybox.net?login=john@doe is a valid URL
 246                // memmove converts to:
 247                // http:/busybox.nett?login=john@doe...
 248                memmove(h->host - 1, h->host, sp - h->host);
 249                h->host--;
 250                sp[-1] = '\0';
 251                h->path = sp;
 252        }
 253
 254        // We used to set h->user to NULL here, but this interferes
 255        // with handling of code 302 ("object was moved")
 256
 257        sp = strrchr(h->host, '@');
 258        if (sp != NULL) {
 259                h->user = h->host;
 260                *sp = '\0';
 261                h->host = sp + 1;
 262        }
 263
 264        sp = h->host;
 265}
 266
 267static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
 268{
 269        char *s, *hdrval;
 270        int c;
 271
 272        /* *istrunc = 0; */
 273
 274        /* retrieve header line */
 275        if (fgets(buf, bufsiz, fp) == NULL)
 276                return NULL;
 277
 278        /* see if we are at the end of the headers */
 279        for (s = buf; *s == '\r'; ++s)
 280                continue;
 281        if (*s == '\n')
 282                return NULL;
 283
 284        /* convert the header name to lower case */
 285        for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
 286                /* tolower for "A-Z", no-op for "0-9a-z-." */
 287                *s = (*s | 0x20);
 288        }
 289
 290        /* verify we are at the end of the header name */
 291        if (*s != ':')
 292                bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
 293
 294        /* locate the start of the header value */
 295        *s++ = '\0';
 296        hdrval = skip_whitespace(s);
 297
 298        /* locate the end of header */
 299        while (*s && *s != '\r' && *s != '\n')
 300                ++s;
 301
 302        /* end of header found */
 303        if (*s) {
 304                *s = '\0';
 305                return hdrval;
 306        }
 307
 308        /* Rats! The buffer isn't big enough to hold the entire header value */
 309        while (c = getc(fp), c != EOF && c != '\n')
 310                continue;
 311        /* *istrunc = 1; */
 312        return hdrval;
 313}
 314
 315#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 316static char *URL_escape(const char *str)
 317{
 318        /* URL encode, see RFC 2396 */
 319        char *dst;
 320        char *res = dst = xmalloc(strlen(str) * 3 + 1);
 321        unsigned char c;
 322
 323        while (1) {
 324                c = *str++;
 325                if (c == '\0'
 326                /* || strchr("!&'()*-.=_~", c) - more code */
 327                 || c == '!'
 328                 || c == '&'
 329                 || c == '\''
 330                 || c == '('
 331                 || c == ')'
 332                 || c == '*'
 333                 || c == '-'
 334                 || c == '.'
 335                 || c == '='
 336                 || c == '_'
 337                 || c == '~'
 338                 || (c >= '0' && c <= '9')
 339                 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
 340                ) {
 341                        *dst++ = c;
 342                        if (c == '\0')
 343                                return res;
 344                } else {
 345                        *dst++ = '%';
 346                        *dst++ = bb_hexdigits_upcase[c >> 4];
 347                        *dst++ = bb_hexdigits_upcase[c & 0xf];
 348                }
 349        }
 350}
 351#endif
 352
 353static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 354{
 355        char buf[512];
 356        FILE *sfp;
 357        char *str;
 358        int port;
 359
 360        if (!target->user)
 361                target->user = xstrdup("anonymous:busybox@");
 362
 363        sfp = open_socket(lsa);
 364        if (ftpcmd(NULL, NULL, sfp, buf) != 220)
 365                bb_error_msg_and_die("%s", sanitize_string(buf+4));
 366
 367        /*
 368         * Splitting username:password pair,
 369         * trying to log in
 370         */
 371        str = strchr(target->user, ':');
 372        if (str)
 373                *str++ = '\0';
 374        switch (ftpcmd("USER ", target->user, sfp, buf)) {
 375        case 230:
 376                break;
 377        case 331:
 378                if (ftpcmd("PASS ", str, sfp, buf) == 230)
 379                        break;
 380                /* fall through (failed login) */
 381        default:
 382                bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
 383        }
 384
 385        ftpcmd("TYPE I", NULL, sfp, buf);
 386
 387        /*
 388         * Querying file size
 389         */
 390        if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
 391                G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
 392                if (G.content_len < 0 || errno) {
 393                        bb_error_msg_and_die("SIZE value is garbage");
 394                }
 395                G.got_clen = 1;
 396        }
 397
 398        /*
 399         * Entering passive mode
 400         */
 401        if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
 402 pasv_error:
 403                bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
 404        }
 405        // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
 406        // Server's IP is N1.N2.N3.N4 (we ignore it)
 407        // Server's port for data connection is P1*256+P2
 408        str = strrchr(buf, ')');
 409        if (str) str[0] = '\0';
 410        str = strrchr(buf, ',');
 411        if (!str) goto pasv_error;
 412        port = xatou_range(str+1, 0, 255);
 413        *str = '\0';
 414        str = strrchr(buf, ',');
 415        if (!str) goto pasv_error;
 416        port += xatou_range(str+1, 0, 255) * 256;
 417        set_nport(lsa, htons(port));
 418
 419        *dfpp = open_socket(lsa);
 420
 421        if (G.beg_range) {
 422                sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
 423                if (ftpcmd(buf, NULL, sfp, buf) == 350)
 424                        G.content_len -= G.beg_range;
 425        }
 426
 427        if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
 428                bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
 429
 430        return sfp;
 431}
 432
 433/* Must match option string! */
 434enum {
 435        WGET_OPT_CONTINUE   = (1 << 0),
 436        WGET_OPT_SPIDER     = (1 << 1),
 437        WGET_OPT_QUIET      = (1 << 2),
 438        WGET_OPT_OUTNAME    = (1 << 3),
 439        WGET_OPT_PREFIX     = (1 << 4),
 440        WGET_OPT_PROXY      = (1 << 5),
 441        WGET_OPT_USER_AGENT = (1 << 6),
 442        WGET_OPT_RETRIES    = (1 << 7),
 443        WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
 444        WGET_OPT_PASSIVE    = (1 << 9),
 445        WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 446        WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 447};
 448
 449static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
 450{
 451        char buf[512];
 452
 453        if (!(option_mask32 & WGET_OPT_QUIET))
 454                progress_meter(-1);
 455
 456        if (G.chunked)
 457                goto get_clen;
 458
 459        /* Loops only if chunked */
 460        while (1) {
 461                while (1) {
 462                        int n;
 463                        unsigned rdsz;
 464
 465                        rdsz = sizeof(buf);
 466                        if (G.got_clen) {
 467                                if (G.content_len < (off_t)sizeof(buf)) {
 468                                        if ((int)G.content_len <= 0)
 469                                                break;
 470                                        rdsz = (unsigned)G.content_len;
 471                                }
 472                        }
 473                        n = safe_fread(buf, rdsz, dfp);
 474                        if (n <= 0) {
 475                                if (ferror(dfp)) {
 476                                        /* perror will not work: ferror doesn't set errno */
 477                                        bb_error_msg_and_die(bb_msg_read_error);
 478                                }
 479                                break;
 480                        }
 481                        xwrite(output_fd, buf, n);
 482#if ENABLE_FEATURE_WGET_STATUSBAR
 483                        G.transferred += n;
 484#endif
 485                        if (G.got_clen)
 486                                G.content_len -= n;
 487                }
 488
 489                if (!G.chunked)
 490                        break;
 491
 492                safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
 493 get_clen:
 494                safe_fgets(buf, sizeof(buf), dfp);
 495                G.content_len = STRTOOFF(buf, NULL, 16);
 496                /* FIXME: error check? */
 497                if (G.content_len == 0)
 498                        break; /* all done! */
 499                G.got_clen = 1;
 500        }
 501
 502        if (!(option_mask32 & WGET_OPT_QUIET))
 503                progress_meter(0);
 504}
 505
 506int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 507int wget_main(int argc UNUSED_PARAM, char **argv)
 508{
 509        char buf[512];
 510        struct host_info server, target;
 511        len_and_sockaddr *lsa;
 512        unsigned opt;
 513        int redir_limit;
 514        char *proxy = NULL;
 515        char *dir_prefix = NULL;
 516#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 517        char *post_data;
 518        char *extra_headers = NULL;
 519        llist_t *headers_llist = NULL;
 520#endif
 521        FILE *sfp;                      /* socket to web/ftp server         */
 522        FILE *dfp;                      /* socket to ftp server (data)      */
 523        char *fname_out;                /* where to direct output (-O)      */
 524        int output_fd = -1;
 525        bool use_proxy;                 /* Use proxies if env vars are set  */
 526        const char *proxy_flag = "on";  /* Use proxies if env vars are set  */
 527        const char *user_agent = "Wget";/* "User-Agent" header field        */
 528
 529        static const char keywords[] ALIGN1 =
 530                "content-length\0""transfer-encoding\0""chunked\0""location\0";
 531        enum {
 532                KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
 533        };
 534#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 535        static const char wget_longopts[] ALIGN1 =
 536                /* name, has_arg, val */
 537                "continue\0"         No_argument       "c"
 538                "spider\0"           No_argument       "s"
 539                "quiet\0"            No_argument       "q"
 540                "output-document\0"  Required_argument "O"
 541                "directory-prefix\0" Required_argument "P"
 542                "proxy\0"            Required_argument "Y"
 543                "user-agent\0"       Required_argument "U"
 544                /* Ignored: */
 545                // "tries\0"            Required_argument "t"
 546                // "timeout\0"          Required_argument "T"
 547                /* Ignored (we always use PASV): */
 548                "passive-ftp\0"      No_argument       "\xff"
 549                "header\0"           Required_argument "\xfe"
 550                "post-data\0"        Required_argument "\xfd"
 551                /* Ignored (we don't do ssl) */
 552                "no-check-certificate\0" No_argument   "\xfc"
 553                ;
 554#endif
 555
 556        INIT_G();
 557
 558#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 559        applet_long_options = wget_longopts;
 560#endif
 561        /* server.allocated = target.allocated = NULL; */
 562        opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
 563        opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
 564                                &fname_out, &dir_prefix,
 565                                &proxy_flag, &user_agent,
 566                                NULL, /* -t RETRIES */
 567                                NULL /* -T NETWORK_READ_TIMEOUT */
 568                                IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
 569                                IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
 570                                );
 571#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 572        if (headers_llist) {
 573                int size = 1;
 574                char *cp;
 575                llist_t *ll = headers_llist;
 576                while (ll) {
 577                        size += strlen(ll->data) + 2;
 578                        ll = ll->link;
 579                }
 580                extra_headers = cp = xmalloc(size);
 581                while (headers_llist) {
 582                        cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
 583                }
 584        }
 585#endif
 586
 587        /* TODO: compat issue: should handle "wget URL1 URL2..." */
 588
 589        target.user = NULL;
 590        parse_url(argv[optind], &target);
 591
 592        /* Use the proxy if necessary */
 593        use_proxy = (strcmp(proxy_flag, "off") != 0);
 594        if (use_proxy) {
 595                proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
 596                if (proxy && proxy[0]) {
 597                        server.user = NULL;
 598                        parse_url(proxy, &server);
 599                } else {
 600                        use_proxy = 0;
 601                }
 602        }
 603        if (!use_proxy) {
 604                server.port = target.port;
 605                if (ENABLE_FEATURE_IPV6) {
 606                        server.host = xstrdup(target.host);
 607                } else {
 608                        server.host = target.host;
 609                }
 610        }
 611
 612        if (ENABLE_FEATURE_IPV6)
 613                strip_ipv6_scope_id(target.host);
 614
 615        /* Guess an output filename, if there was no -O FILE */
 616        if (!(opt & WGET_OPT_OUTNAME)) {
 617                fname_out = bb_get_last_path_component_nostrip(target.path);
 618                /* handle "wget http://kernel.org//" */
 619                if (fname_out[0] == '/' || !fname_out[0])
 620                        fname_out = (char*)"index.html";
 621                /* -P DIR is considered only if there was no -O FILE */
 622                if (dir_prefix)
 623                        fname_out = concat_path_file(dir_prefix, fname_out);
 624        } else {
 625                if (LONE_DASH(fname_out)) {
 626                        /* -O - */
 627                        output_fd = 1;
 628                        opt &= ~WGET_OPT_CONTINUE;
 629                }
 630        }
 631#if ENABLE_FEATURE_WGET_STATUSBAR
 632        G.curfile = bb_get_last_path_component_nostrip(fname_out);
 633#endif
 634
 635        /* Impossible?
 636        if ((opt & WGET_OPT_CONTINUE) && !fname_out)
 637                bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
 638        */
 639
 640        /* Determine where to start transfer */
 641        if (opt & WGET_OPT_CONTINUE) {
 642                output_fd = open(fname_out, O_WRONLY);
 643                if (output_fd >= 0) {
 644                        G.beg_range = xlseek(output_fd, 0, SEEK_END);
 645                }
 646                /* File doesn't exist. We do not create file here yet.
 647                 * We are not sure it exists on remove side */
 648        }
 649
 650        redir_limit = 5;
 651 resolve_lsa:
 652        lsa = xhost2sockaddr(server.host, server.port);
 653        if (!(opt & WGET_OPT_QUIET)) {
 654                char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
 655                fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
 656                free(s);
 657        }
 658 establish_session:
 659        if (use_proxy || !target.is_ftp) {
 660                /*
 661                 *  HTTP session
 662                 */
 663                char *str;
 664                int status;
 665
 666                /* Open socket to http server */
 667                sfp = open_socket(lsa);
 668
 669                /* Send HTTP request */
 670                if (use_proxy) {
 671                        fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
 672                                target.is_ftp ? "f" : "ht", target.host,
 673                                target.path);
 674                } else {
 675                        if (opt & WGET_OPT_POST_DATA)
 676                                fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
 677                        else
 678                                fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
 679                }
 680
 681                fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
 682                        target.host, user_agent);
 683
 684#if ENABLE_FEATURE_WGET_AUTHENTICATION
 685                if (target.user) {
 686                        fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
 687                                base64enc_512(buf, target.user));
 688                }
 689                if (use_proxy && server.user) {
 690                        fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
 691                                base64enc_512(buf, server.user));
 692                }
 693#endif
 694
 695                if (G.beg_range)
 696                        fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
 697#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 698                if (extra_headers)
 699                        fputs(extra_headers, sfp);
 700
 701                if (opt & WGET_OPT_POST_DATA) {
 702                        char *estr = URL_escape(post_data);
 703                        fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
 704                        fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
 705                                        (int) strlen(estr), estr);
 706                        /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
 707                        /*fprintf(sfp, "%s\r\n", estr);*/
 708                        free(estr);
 709                } else
 710#endif
 711                { /* If "Connection:" is needed, document why */
 712                        fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
 713                }
 714
 715                /*
 716                 * Retrieve HTTP response line and check for "200" status code.
 717                 */
 718 read_response:
 719                if (fgets(buf, sizeof(buf), sfp) == NULL)
 720                        bb_error_msg_and_die("no response from server");
 721
 722                str = buf;
 723                str = skip_non_whitespace(str);
 724                str = skip_whitespace(str);
 725                // FIXME: no error check
 726                // xatou wouldn't work: "200 OK"
 727                status = atoi(str);
 728                switch (status) {
 729                case 0:
 730                case 100:
 731                        while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
 732                                /* eat all remaining headers */;
 733                        goto read_response;
 734                case 200:
 735/*
 736Response 204 doesn't say "null file", it says "metadata
 737has changed but data didn't":
 738
 739"10.2.5 204 No Content
 740The server has fulfilled the request but does not need to return
 741an entity-body, and might want to return updated metainformation.
 742The response MAY include new or updated metainformation in the form
 743of entity-headers, which if present SHOULD be associated with
 744the requested variant.
 745
 746If the client is a user agent, it SHOULD NOT change its document
 747view from that which caused the request to be sent. This response
 748is primarily intended to allow input for actions to take place
 749without causing a change to the user agent's active document view,
 750although any new or updated metainformation SHOULD be applied
 751to the document currently in the user agent's active view.
 752
 753The 204 response MUST NOT include a message-body, and thus
 754is always terminated by the first empty line after the header fields."
 755
 756However, in real world it was observed that some web servers
 757(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
 758*/
 759                case 204:
 760                        break;
 761                case 300:       /* redirection */
 762                case 301:
 763                case 302:
 764                case 303:
 765                        break;
 766                case 206:
 767                        if (G.beg_range)
 768                                break;
 769                        /* fall through */
 770                default:
 771                        bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
 772                }
 773
 774                /*
 775                 * Retrieve HTTP headers.
 776                 */
 777                while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
 778                        /* gethdr converted "FOO:" string to lowercase */
 779                        smalluint key;
 780                        /* strip trailing whitespace */
 781                        char *s = strchrnul(str, '\0') - 1;
 782                        while (s >= str && (*s == ' ' || *s == '\t')) {
 783                                *s = '\0';
 784                                s--;
 785                        }
 786                        key = index_in_strings(keywords, buf) + 1;
 787                        if (key == KEY_content_length) {
 788                                G.content_len = BB_STRTOOFF(str, NULL, 10);
 789                                if (G.content_len < 0 || errno) {
 790                                        bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
 791                                }
 792                                G.got_clen = 1;
 793                                continue;
 794                        }
 795                        if (key == KEY_transfer_encoding) {
 796                                if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
 797                                        bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
 798                                G.chunked = G.got_clen = 1;
 799                        }
 800                        if (key == KEY_location && status >= 300) {
 801                                if (--redir_limit == 0)
 802                                        bb_error_msg_and_die("too many redirections");
 803                                fclose(sfp);
 804                                G.got_clen = 0;
 805                                G.chunked = 0;
 806                                if (str[0] == '/')
 807                                        /* free(target.allocated); */
 808                                        target.path = /* target.allocated = */ xstrdup(str+1);
 809                                        /* lsa stays the same: it's on the same server */
 810                                else {
 811                                        parse_url(str, &target);
 812                                        if (!use_proxy) {
 813                                                server.host = target.host;
 814                                                /* strip_ipv6_scope_id(target.host); - no! */
 815                                                /* we assume remote never gives us IPv6 addr with scope id */
 816                                                server.port = target.port;
 817                                                free(lsa);
 818                                                goto resolve_lsa;
 819                                        } /* else: lsa stays the same: we use proxy */
 820                                }
 821                                goto establish_session;
 822                        }
 823                }
 824//              if (status >= 300)
 825//                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
 826
 827                /* For HTTP, data is pumped over the same connection */
 828                dfp = sfp;
 829
 830        } else {
 831                /*
 832                 *  FTP session
 833                 */
 834                sfp = prepare_ftp_session(&dfp, &target, lsa);
 835        }
 836
 837        if (opt & WGET_OPT_SPIDER) {
 838                if (ENABLE_FEATURE_CLEAN_UP)
 839                        fclose(sfp);
 840                return EXIT_SUCCESS;
 841        }
 842
 843        if (output_fd < 0) {
 844                int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
 845                /* compat with wget: -O FILE can overwrite */
 846                if (opt & WGET_OPT_OUTNAME)
 847                        o_flags = O_WRONLY | O_CREAT | O_TRUNC;
 848                output_fd = xopen(fname_out, o_flags);
 849        }
 850
 851        retrieve_file_data(dfp, output_fd);
 852        xclose(output_fd);
 853
 854        if (dfp != sfp) {
 855                /* It's ftp. Close it properly */
 856                fclose(dfp);
 857                if (ftpcmd(NULL, NULL, sfp, buf) != 226)
 858                        bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
 859                /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
 860        }
 861
 862        return EXIT_SUCCESS;
 863}
 864