busybox/networking/wget.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wget - retrieve a file using HTTP or FTP
   4 *
   5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
   6 *
   7 * Licensed under GPLv2, see file LICENSE in this tarball for details.
   8 */
   9#include "libbb.h"
  10
  11struct host_info {
  12        // May be used if we ever will want to free() all xstrdup()s...
  13        /* char *allocated; */
  14        const char *path;
  15        const char *user;
  16        char       *host;
  17        int         port;
  18        smallint    is_ftp;
  19};
  20
  21
  22/* Globals (can be accessed from signal handlers) */
  23struct globals {
  24        off_t content_len;        /* Content-length of the file */
  25        off_t beg_range;          /* Range at which continue begins */
  26#if ENABLE_FEATURE_WGET_STATUSBAR
  27        off_t transferred;        /* Number of bytes transferred so far */
  28        const char *curfile;      /* Name of current file being transferred */
  29        bb_progress_t pmt;
  30#endif
  31        smallint chunked;         /* chunked transfer encoding */
  32        smallint got_clen;        /* got content-length: from server  */
  33};
  34#define G (*(struct globals*)&bb_common_bufsiz1)
  35struct BUG_G_too_big {
  36        char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
  37};
  38#define INIT_G() do { } while (0)
  39
  40
  41#if ENABLE_FEATURE_WGET_STATUSBAR
  42
  43static void progress_meter(int flag)
  44{
  45        /* We can be called from signal handler */
  46        int save_errno = errno;
  47
  48        if (flag == -1) { /* first call to progress_meter */
  49                bb_progress_init(&G.pmt);
  50        }
  51
  52        bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
  53                           G.chunked ? 0 : G.content_len + G.beg_range);
  54
  55        if (flag == 0) {
  56                /* last call to progress_meter */
  57                alarm(0);
  58                fputc('\n', stderr);
  59                G.transferred = 0;
  60        } else {
  61                if (flag == -1) { /* first call to progress_meter */
  62                        signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
  63                }
  64                alarm(1);
  65        }
  66
  67        errno = save_errno;
  68}
  69
  70#else /* FEATURE_WGET_STATUSBAR */
  71
  72static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
  73
  74#endif
  75
  76
  77/* IPv6 knows scoped address types i.e. link and site local addresses. Link
  78 * local addresses can have a scope identifier to specify the
  79 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
  80 * identifier is only valid on a single node.
  81 *
  82 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
  83 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
  84 * in the Host header as invalid requests, see
  85 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
  86 */
  87static void strip_ipv6_scope_id(char *host)
  88{
  89        char *scope, *cp;
  90
  91        /* bbox wget actually handles IPv6 addresses without [], like
  92         * wget "http://::1/xxx", but this is not standard.
  93         * To save code, _here_ we do not support it. */
  94
  95        if (host[0] != '[')
  96                return; /* not IPv6 */
  97
  98        scope = strchr(host, '%');
  99        if (!scope)
 100                return;
 101
 102        /* Remove the IPv6 zone identifier from the host address */
 103        cp = strchr(host, ']');
 104        if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
 105                /* malformed address (not "[xx]:nn" or "[xx]") */
 106                return;
 107        }
 108
 109        /* cp points to "]...", scope points to "%eth0]..." */
 110        overlapping_strcpy(scope, cp);
 111}
 112
 113/* Read NMEMB bytes into PTR from STREAM.  Returns the number of bytes read,
 114 * and a short count if an eof or non-interrupt error is encountered.  */
 115static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
 116{
 117        size_t ret;
 118        char *p = (char*)ptr;
 119
 120        do {
 121                clearerr(stream);
 122                errno = 0;
 123                ret = fread(p, 1, nmemb, stream);
 124                p += ret;
 125                nmemb -= ret;
 126        } while (nmemb && ferror(stream) && errno == EINTR);
 127
 128        return p - (char*)ptr;
 129}
 130
 131/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
 132 * Returns S, or NULL if an eof or non-interrupt error is encountered.  */
 133static char *safe_fgets(char *s, int size, FILE *stream)
 134{
 135        char *ret;
 136
 137        do {
 138                clearerr(stream);
 139                errno = 0;
 140                ret = fgets(s, size, stream);
 141        } while (ret == NULL && ferror(stream) && errno == EINTR);
 142
 143        return ret;
 144}
 145
 146#if ENABLE_FEATURE_WGET_AUTHENTICATION
 147/* Base64-encode character string. buf is assumed to be char buf[512]. */
 148static char *base64enc_512(char buf[512], const char *str)
 149{
 150        unsigned len = strlen(str);
 151        if (len > 512/4*3 - 10) /* paranoia */
 152                len = 512/4*3 - 10;
 153        bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
 154        return buf;
 155}
 156#endif
 157
 158static char* sanitize_string(char *s)
 159{
 160        unsigned char *p = (void *) s;
 161        while (*p >= ' ')
 162                p++;
 163        *p = '\0';
 164        return s;
 165}
 166
 167static FILE *open_socket(len_and_sockaddr *lsa)
 168{
 169        FILE *fp;
 170
 171        /* glibc 2.4 seems to try seeking on it - ??! */
 172        /* hopefully it understands what ESPIPE means... */
 173        fp = fdopen(xconnect_stream(lsa), "r+");
 174        if (fp == NULL)
 175                bb_perror_msg_and_die("fdopen");
 176
 177        return fp;
 178}
 179
 180static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
 181{
 182        int result;
 183        if (s1) {
 184                if (!s2) s2 = "";
 185                fprintf(fp, "%s%s\r\n", s1, s2);
 186                fflush(fp);
 187        }
 188
 189        do {
 190                char *buf_ptr;
 191
 192                if (fgets(buf, 510, fp) == NULL) {
 193                        bb_perror_msg_and_die("error getting response");
 194                }
 195                buf_ptr = strstr(buf, "\r\n");
 196                if (buf_ptr) {
 197                        *buf_ptr = '\0';
 198                }
 199        } while (!isdigit(buf[0]) || buf[3] != ' ');
 200
 201        buf[3] = '\0';
 202        result = xatoi_u(buf);
 203        buf[3] = ' ';
 204        return result;
 205}
 206
 207static void parse_url(char *src_url, struct host_info *h)
 208{
 209        char *url, *p, *sp;
 210
 211        /* h->allocated = */ url = xstrdup(src_url);
 212
 213        if (strncmp(url, "http://", 7) == 0) {
 214                h->port = bb_lookup_port("http", "tcp", 80);
 215                h->host = url + 7;
 216                h->is_ftp = 0;
 217        } else if (strncmp(url, "ftp://", 6) == 0) {
 218                h->port = bb_lookup_port("ftp", "tcp", 21);
 219                h->host = url + 6;
 220                h->is_ftp = 1;
 221        } else
 222                bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
 223
 224        // FYI:
 225        // "Real" wget 'http://busybox.net?var=a/b' sends this request:
 226        //   'GET /?var=a/b HTTP 1.0'
 227        //   and saves 'index.html?var=a%2Fb' (we save 'b')
 228        // wget 'http://busybox.net?login=john@doe':
 229        //   request: 'GET /?login=john@doe HTTP/1.0'
 230        //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
 231        // wget 'http://busybox.net#test/test':
 232        //   request: 'GET / HTTP/1.0'
 233        //   saves: 'index.html' (we save 'test')
 234        //
 235        // We also don't add unique .N suffix if file exists...
 236        sp = strchr(h->host, '/');
 237        p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
 238        p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
 239        if (!sp) {
 240                h->path = "";
 241        } else if (*sp == '/') {
 242                *sp = '\0';
 243                h->path = sp + 1;
 244        } else { // '#' or '?'
 245                // http://busybox.net?login=john@doe is a valid URL
 246                // memmove converts to:
 247                // http:/busybox.nett?login=john@doe...
 248                memmove(h->host - 1, h->host, sp - h->host);
 249                h->host--;
 250                sp[-1] = '\0';
 251                h->path = sp;
 252        }
 253
 254        // We used to set h->user to NULL here, but this interferes
 255        // with handling of code 302 ("object was moved")
 256
 257        sp = strrchr(h->host, '@');
 258        if (sp != NULL) {
 259                h->user = h->host;
 260                *sp = '\0';
 261                h->host = sp + 1;
 262        }
 263
 264        sp = h->host;
 265}
 266
 267static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
 268{
 269        char *s, *hdrval;
 270        int c;
 271
 272        /* *istrunc = 0; */
 273
 274        /* retrieve header line */
 275        if (fgets(buf, bufsiz, fp) == NULL)
 276                return NULL;
 277
 278        /* see if we are at the end of the headers */
 279        for (s = buf; *s == '\r'; ++s)
 280                continue;
 281        if (*s == '\n')
 282                return NULL;
 283
 284        /* convert the header name to lower case */
 285        for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
 286                *s = tolower(*s);
 287
 288        /* verify we are at the end of the header name */
 289        if (*s != ':')
 290                bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
 291
 292        /* locate the start of the header value */
 293        *s++ = '\0';
 294        hdrval = skip_whitespace(s);
 295
 296        /* locate the end of header */
 297        while (*s && *s != '\r' && *s != '\n')
 298                ++s;
 299
 300        /* end of header found */
 301        if (*s) {
 302                *s = '\0';
 303                return hdrval;
 304        }
 305
 306        /* Rats! The buffer isn't big enough to hold the entire header value */
 307        while (c = getc(fp), c != EOF && c != '\n')
 308                continue;
 309        /* *istrunc = 1; */
 310        return hdrval;
 311}
 312
 313#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 314static char *URL_escape(const char *str)
 315{
 316        /* URL encode, see RFC 2396 */
 317        char *dst;
 318        char *res = dst = xmalloc(strlen(str) * 3 + 1);
 319        unsigned char c;
 320
 321        while (1) {
 322                c = *str++;
 323                if (c == '\0'
 324                /* || strchr("!&'()*-.=_~", c) - more code */
 325                 || c == '!'
 326                 || c == '&'
 327                 || c == '\''
 328                 || c == '('
 329                 || c == ')'
 330                 || c == '*'
 331                 || c == '-'
 332                 || c == '.'
 333                 || c == '='
 334                 || c == '_'
 335                 || c == '~'
 336                 || (c >= '0' && c <= '9')
 337                 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
 338                ) {
 339                        *dst++ = c;
 340                        if (c == '\0')
 341                                return res;
 342                } else {
 343                        *dst++ = '%';
 344                        *dst++ = bb_hexdigits_upcase[c >> 4];
 345                        *dst++ = bb_hexdigits_upcase[c & 0xf];
 346                }
 347        }
 348}
 349#endif
 350
 351static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 352{
 353        char buf[512];
 354        FILE *sfp;
 355        char *str;
 356        int port;
 357
 358        if (!target->user)
 359                target->user = xstrdup("anonymous:busybox@");
 360
 361        sfp = open_socket(lsa);
 362        if (ftpcmd(NULL, NULL, sfp, buf) != 220)
 363                bb_error_msg_and_die("%s", sanitize_string(buf+4));
 364
 365        /*
 366         * Splitting username:password pair,
 367         * trying to log in
 368         */
 369        str = strchr(target->user, ':');
 370        if (str)
 371                *str++ = '\0';
 372        switch (ftpcmd("USER ", target->user, sfp, buf)) {
 373        case 230:
 374                break;
 375        case 331:
 376                if (ftpcmd("PASS ", str, sfp, buf) == 230)
 377                        break;
 378                /* fall through (failed login) */
 379        default:
 380                bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
 381        }
 382
 383        ftpcmd("TYPE I", NULL, sfp, buf);
 384
 385        /*
 386         * Querying file size
 387         */
 388        if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
 389                G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
 390                if (G.content_len < 0 || errno) {
 391                        bb_error_msg_and_die("SIZE value is garbage");
 392                }
 393                G.got_clen = 1;
 394        }
 395
 396        /*
 397         * Entering passive mode
 398         */
 399        if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
 400 pasv_error:
 401                bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
 402        }
 403        // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
 404        // Server's IP is N1.N2.N3.N4 (we ignore it)
 405        // Server's port for data connection is P1*256+P2
 406        str = strrchr(buf, ')');
 407        if (str) str[0] = '\0';
 408        str = strrchr(buf, ',');
 409        if (!str) goto pasv_error;
 410        port = xatou_range(str+1, 0, 255);
 411        *str = '\0';
 412        str = strrchr(buf, ',');
 413        if (!str) goto pasv_error;
 414        port += xatou_range(str+1, 0, 255) * 256;
 415        set_nport(lsa, htons(port));
 416
 417        *dfpp = open_socket(lsa);
 418
 419        if (G.beg_range) {
 420                sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
 421                if (ftpcmd(buf, NULL, sfp, buf) == 350)
 422                        G.content_len -= G.beg_range;
 423        }
 424
 425        if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
 426                bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
 427
 428        return sfp;
 429}
 430
 431/* Must match option string! */
 432enum {
 433        WGET_OPT_CONTINUE   = (1 << 0),
 434        WGET_OPT_SPIDER     = (1 << 1),
 435        WGET_OPT_QUIET      = (1 << 2),
 436        WGET_OPT_OUTNAME    = (1 << 3),
 437        WGET_OPT_PREFIX     = (1 << 4),
 438        WGET_OPT_PROXY      = (1 << 5),
 439        WGET_OPT_USER_AGENT = (1 << 6),
 440        WGET_OPT_RETRIES    = (1 << 7),
 441        WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
 442        WGET_OPT_PASSIVE    = (1 << 9),
 443        WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 444        WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 445};
 446
 447static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
 448{
 449        char buf[512];
 450
 451        if (!(option_mask32 & WGET_OPT_QUIET))
 452                progress_meter(-1);
 453
 454        if (G.chunked)
 455                goto get_clen;
 456
 457        /* Loops only if chunked */
 458        while (1) {
 459                while (1) {
 460                        int n;
 461                        unsigned rdsz;
 462
 463                        rdsz = sizeof(buf);
 464                        if (G.got_clen) {
 465                                if (G.content_len < (off_t)sizeof(buf)) {
 466                                        if ((int)G.content_len <= 0)
 467                                                break;
 468                                        rdsz = (unsigned)G.content_len;
 469                                }
 470                        }
 471                        n = safe_fread(buf, rdsz, dfp);
 472                        if (n <= 0) {
 473                                if (ferror(dfp)) {
 474                                        /* perror will not work: ferror doesn't set errno */
 475                                        bb_error_msg_and_die(bb_msg_read_error);
 476                                }
 477                                break;
 478                        }
 479                        xwrite(output_fd, buf, n);
 480#if ENABLE_FEATURE_WGET_STATUSBAR
 481                        G.transferred += n;
 482#endif
 483                        if (G.got_clen)
 484                                G.content_len -= n;
 485                }
 486
 487                if (!G.chunked)
 488                        break;
 489
 490                safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
 491 get_clen:
 492                safe_fgets(buf, sizeof(buf), dfp);
 493                G.content_len = STRTOOFF(buf, NULL, 16);
 494                /* FIXME: error check? */
 495                if (G.content_len == 0)
 496                        break; /* all done! */
 497                G.got_clen = 1;
 498        }
 499
 500        if (!(option_mask32 & WGET_OPT_QUIET))
 501                progress_meter(0);
 502}
 503
 504int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 505int wget_main(int argc UNUSED_PARAM, char **argv)
 506{
 507        char buf[512];
 508        struct host_info server, target;
 509        len_and_sockaddr *lsa;
 510        unsigned opt;
 511        int redir_limit;
 512        char *proxy = NULL;
 513        char *dir_prefix = NULL;
 514#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 515        char *post_data;
 516        char *extra_headers = NULL;
 517        llist_t *headers_llist = NULL;
 518#endif
 519        FILE *sfp;                      /* socket to web/ftp server         */
 520        FILE *dfp;                      /* socket to ftp server (data)      */
 521        char *fname_out;                /* where to direct output (-O)      */
 522        int output_fd = -1;
 523        bool use_proxy;                 /* Use proxies if env vars are set  */
 524        const char *proxy_flag = "on";  /* Use proxies if env vars are set  */
 525        const char *user_agent = "Wget";/* "User-Agent" header field        */
 526
 527        static const char keywords[] ALIGN1 =
 528                "content-length\0""transfer-encoding\0""chunked\0""location\0";
 529        enum {
 530                KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
 531        };
 532#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 533        static const char wget_longopts[] ALIGN1 =
 534                /* name, has_arg, val */
 535                "continue\0"         No_argument       "c"
 536                "spider\0"           No_argument       "s"
 537                "quiet\0"            No_argument       "q"
 538                "output-document\0"  Required_argument "O"
 539                "directory-prefix\0" Required_argument "P"
 540                "proxy\0"            Required_argument "Y"
 541                "user-agent\0"       Required_argument "U"
 542                /* Ignored: */
 543                // "tries\0"            Required_argument "t"
 544                // "timeout\0"          Required_argument "T"
 545                /* Ignored (we always use PASV): */
 546                "passive-ftp\0"      No_argument       "\xff"
 547                "header\0"           Required_argument "\xfe"
 548                "post-data\0"        Required_argument "\xfd"
 549                ;
 550#endif
 551
 552        INIT_G();
 553
 554#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 555        applet_long_options = wget_longopts;
 556#endif
 557        /* server.allocated = target.allocated = NULL; */
 558        opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
 559        opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
 560                                &fname_out, &dir_prefix,
 561                                &proxy_flag, &user_agent,
 562                                NULL, /* -t RETRIES */
 563                                NULL /* -T NETWORK_READ_TIMEOUT */
 564                                IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
 565                                IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
 566                                );
 567#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 568        if (headers_llist) {
 569                int size = 1;
 570                char *cp;
 571                llist_t *ll = headers_llist;
 572                while (ll) {
 573                        size += strlen(ll->data) + 2;
 574                        ll = ll->link;
 575                }
 576                extra_headers = cp = xmalloc(size);
 577                while (headers_llist) {
 578                        cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
 579                }
 580        }
 581#endif
 582
 583        /* TODO: compat issue: should handle "wget URL1 URL2..." */
 584
 585        target.user = NULL;
 586        parse_url(argv[optind], &target);
 587
 588        /* Use the proxy if necessary */
 589        use_proxy = (strcmp(proxy_flag, "off") != 0);
 590        if (use_proxy) {
 591                proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
 592                if (proxy && proxy[0]) {
 593                        parse_url(proxy, &server);
 594                } else {
 595                        use_proxy = 0;
 596                }
 597        }
 598        if (!use_proxy) {
 599                server.port = target.port;
 600                if (ENABLE_FEATURE_IPV6) {
 601                        server.host = xstrdup(target.host);
 602                } else {
 603                        server.host = target.host;
 604                }
 605        }
 606
 607        if (ENABLE_FEATURE_IPV6)
 608                strip_ipv6_scope_id(target.host);
 609
 610        /* Guess an output filename, if there was no -O FILE */
 611        if (!(opt & WGET_OPT_OUTNAME)) {
 612                fname_out = bb_get_last_path_component_nostrip(target.path);
 613                /* handle "wget http://kernel.org//" */
 614                if (fname_out[0] == '/' || !fname_out[0])
 615                        fname_out = (char*)"index.html";
 616                /* -P DIR is considered only if there was no -O FILE */
 617                if (dir_prefix)
 618                        fname_out = concat_path_file(dir_prefix, fname_out);
 619        } else {
 620                if (LONE_DASH(fname_out)) {
 621                        /* -O - */
 622                        output_fd = 1;
 623                        opt &= ~WGET_OPT_CONTINUE;
 624                }
 625        }
 626#if ENABLE_FEATURE_WGET_STATUSBAR
 627        G.curfile = bb_get_last_path_component_nostrip(fname_out);
 628#endif
 629
 630        /* Impossible?
 631        if ((opt & WGET_OPT_CONTINUE) && !fname_out)
 632                bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
 633        */
 634
 635        /* Determine where to start transfer */
 636        if (opt & WGET_OPT_CONTINUE) {
 637                output_fd = open(fname_out, O_WRONLY);
 638                if (output_fd >= 0) {
 639                        G.beg_range = xlseek(output_fd, 0, SEEK_END);
 640                }
 641                /* File doesn't exist. We do not create file here yet.
 642                 * We are not sure it exists on remove side */
 643        }
 644
 645        redir_limit = 5;
 646 resolve_lsa:
 647        lsa = xhost2sockaddr(server.host, server.port);
 648        if (!(opt & WGET_OPT_QUIET)) {
 649                char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
 650                fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
 651                free(s);
 652        }
 653 establish_session:
 654        if (use_proxy || !target.is_ftp) {
 655                /*
 656                 *  HTTP session
 657                 */
 658                char *str;
 659                int status;
 660
 661                /* Open socket to http server */
 662                sfp = open_socket(lsa);
 663
 664                /* Send HTTP request */
 665                if (use_proxy) {
 666                        fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
 667                                target.is_ftp ? "f" : "ht", target.host,
 668                                target.path);
 669                } else {
 670                        if (opt & WGET_OPT_POST_DATA)
 671                                fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
 672                        else
 673                                fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
 674                }
 675
 676                fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
 677                        target.host, user_agent);
 678
 679#if ENABLE_FEATURE_WGET_AUTHENTICATION
 680                if (target.user) {
 681                        fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
 682                                base64enc_512(buf, target.user));
 683                }
 684                if (use_proxy && server.user) {
 685                        fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
 686                                base64enc_512(buf, server.user));
 687                }
 688#endif
 689
 690                if (G.beg_range)
 691                        fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
 692#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 693                if (extra_headers)
 694                        fputs(extra_headers, sfp);
 695
 696                if (opt & WGET_OPT_POST_DATA) {
 697                        char *estr = URL_escape(post_data);
 698                        fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
 699                        fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
 700                                        (int) strlen(estr), estr);
 701                        /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
 702                        /*fprintf(sfp, "%s\r\n", estr);*/
 703                        free(estr);
 704                } else
 705#endif
 706                { /* If "Connection:" is needed, document why */
 707                        fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
 708                }
 709
 710                /*
 711                 * Retrieve HTTP response line and check for "200" status code.
 712                 */
 713 read_response:
 714                if (fgets(buf, sizeof(buf), sfp) == NULL)
 715                        bb_error_msg_and_die("no response from server");
 716
 717                str = buf;
 718                str = skip_non_whitespace(str);
 719                str = skip_whitespace(str);
 720                // FIXME: no error check
 721                // xatou wouldn't work: "200 OK"
 722                status = atoi(str);
 723                switch (status) {
 724                case 0:
 725                case 100:
 726                        while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
 727                                /* eat all remaining headers */;
 728                        goto read_response;
 729                case 200:
 730/*
 731Response 204 doesn't say "null file", it says "metadata
 732has changed but data didn't":
 733
 734"10.2.5 204 No Content
 735The server has fulfilled the request but does not need to return
 736an entity-body, and might want to return updated metainformation.
 737The response MAY include new or updated metainformation in the form
 738of entity-headers, which if present SHOULD be associated with
 739the requested variant.
 740
 741If the client is a user agent, it SHOULD NOT change its document
 742view from that which caused the request to be sent. This response
 743is primarily intended to allow input for actions to take place
 744without causing a change to the user agent's active document view,
 745although any new or updated metainformation SHOULD be applied
 746to the document currently in the user agent's active view.
 747
 748The 204 response MUST NOT include a message-body, and thus
 749is always terminated by the first empty line after the header fields."
 750
 751However, in real world it was observed that some web servers
 752(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
 753*/
 754                case 204:
 755                        break;
 756                case 300:       /* redirection */
 757                case 301:
 758                case 302:
 759                case 303:
 760                        break;
 761                case 206:
 762                        if (G.beg_range)
 763                                break;
 764                        /* fall through */
 765                default:
 766                        bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
 767                }
 768
 769                /*
 770                 * Retrieve HTTP headers.
 771                 */
 772                while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
 773                        /* gethdr converted "FOO:" string to lowercase */
 774                        smalluint key;
 775                        /* strip trailing whitespace */
 776                        char *s = strchrnul(str, '\0') - 1;
 777                        while (s >= str && (*s == ' ' || *s == '\t')) {
 778                                *s = '\0';
 779                                s--;
 780                        }
 781                        key = index_in_strings(keywords, buf) + 1;
 782                        if (key == KEY_content_length) {
 783                                G.content_len = BB_STRTOOFF(str, NULL, 10);
 784                                if (G.content_len < 0 || errno) {
 785                                        bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
 786                                }
 787                                G.got_clen = 1;
 788                                continue;
 789                        }
 790                        if (key == KEY_transfer_encoding) {
 791                                if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
 792                                        bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
 793                                G.chunked = G.got_clen = 1;
 794                        }
 795                        if (key == KEY_location && status >= 300) {
 796                                if (--redir_limit == 0)
 797                                        bb_error_msg_and_die("too many redirections");
 798                                fclose(sfp);
 799                                G.got_clen = 0;
 800                                G.chunked = 0;
 801                                if (str[0] == '/')
 802                                        /* free(target.allocated); */
 803                                        target.path = /* target.allocated = */ xstrdup(str+1);
 804                                        /* lsa stays the same: it's on the same server */
 805                                else {
 806                                        parse_url(str, &target);
 807                                        if (!use_proxy) {
 808                                                server.host = target.host;
 809                                                /* strip_ipv6_scope_id(target.host); - no! */
 810                                                /* we assume remote never gives us IPv6 addr with scope id */
 811                                                server.port = target.port;
 812                                                free(lsa);
 813                                                goto resolve_lsa;
 814                                        } /* else: lsa stays the same: we use proxy */
 815                                }
 816                                goto establish_session;
 817                        }
 818                }
 819//              if (status >= 300)
 820//                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
 821
 822                /* For HTTP, data is pumped over the same connection */
 823                dfp = sfp;
 824
 825        } else {
 826                /*
 827                 *  FTP session
 828                 */
 829                sfp = prepare_ftp_session(&dfp, &target, lsa);
 830        }
 831
 832        if (opt & WGET_OPT_SPIDER) {
 833                if (ENABLE_FEATURE_CLEAN_UP)
 834                        fclose(sfp);
 835                return EXIT_SUCCESS;
 836        }
 837
 838        if (output_fd < 0) {
 839                int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
 840                /* compat with wget: -O FILE can overwrite */
 841                if (opt & WGET_OPT_OUTNAME)
 842                        o_flags = O_WRONLY | O_CREAT | O_TRUNC;
 843                output_fd = xopen(fname_out, o_flags);
 844        }
 845
 846        retrieve_file_data(dfp, output_fd);
 847        xclose(output_fd);
 848
 849        if (dfp != sfp) {
 850                /* It's ftp. Close it properly */
 851                fclose(dfp);
 852                if (ftpcmd(NULL, NULL, sfp, buf) != 226)
 853                        bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
 854                /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
 855        }
 856
 857        return EXIT_SUCCESS;
 858}
 859