busybox/networking/wget.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wget - retrieve a file using HTTP or FTP
   4 *
   5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
   6 * Licensed under GPLv2, see file LICENSE in this source tree.
   7 *
   8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
   9 * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
  10 */
  11//config:config WGET
  12//config:       bool "wget (35 kb)"
  13//config:       default y
  14//config:       help
  15//config:       wget is a utility for non-interactive download of files from HTTP
  16//config:       and FTP servers.
  17//config:
  18//config:config FEATURE_WGET_LONG_OPTIONS
  19//config:       bool "Enable long options"
  20//config:       default y
  21//config:       depends on WGET && LONG_OPTS
  22//config:
  23//config:config FEATURE_WGET_STATUSBAR
  24//config:       bool "Enable progress bar (+2k)"
  25//config:       default y
  26//config:       depends on WGET
  27//config:
  28//config:config FEATURE_WGET_AUTHENTICATION
  29//config:       bool "Enable HTTP authentication"
  30//config:       default y
  31//config:       depends on WGET
  32//config:       help
  33//config:       Support authenticated HTTP transfers.
  34//config:
  35//config:config FEATURE_WGET_TIMEOUT
  36//config:       bool "Enable timeout option -T SEC"
  37//config:       default y
  38//config:       depends on WGET
  39//config:       help
  40//config:       Supports network read and connect timeouts for wget,
  41//config:       so that wget will give up and timeout, through the -T
  42//config:       command line option.
  43//config:
  44//config:       Currently only connect and network data read timeout are
  45//config:       supported (i.e., timeout is not applied to the DNS query). When
  46//config:       FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
  47//config:       will work in addition to -T.
  48//config:
  49//config:config FEATURE_WGET_HTTPS
  50//config:       bool "Support HTTPS using internal TLS code"
  51//it also enables FTPS support, but it's not well tested yet
  52//config:       default y
  53//config:       depends on WGET
  54//config:       select TLS
  55//config:       help
  56//config:       wget will use internal TLS code to connect to https:// URLs.
  57//config:       Note:
  58//config:       On NOMMU machines, ssl_helper applet should be available
  59//config:       in the $PATH for this to work. Make sure to select that applet.
  60//config:
  61//config:       Note: currently, TLS code only makes TLS I/O work, it
  62//config:       does *not* check that the peer is who it claims to be, etc.
  63//config:       IOW: it uses peer-supplied public keys to establish encryption
  64//config:       and signing keys, then encrypts and signs outgoing data and
  65//config:       decrypts incoming data.
  66//config:       It does not check signature hashes on the incoming data:
  67//config:       this means that attackers manipulating TCP packets can
  68//config:       send altered data and we unknowingly receive garbage.
  69//config:       (This check might be relatively easy to add).
  70//config:       It does not check public key's certificate:
  71//config:       this means that the peer may be an attacker impersonating
  72//config:       the server we think we are talking to.
  73//config:
  74//config:       If you think this is unacceptable, consider this. As more and more
  75//config:       servers switch to HTTPS-only operation, without such "crippled"
  76//config:       TLS code it is *impossible* to simply download a kernel source
  77//config:       from kernel.org. Which can in real world translate into
  78//config:       "my small automatic tooling to build cross-compilers from sources
  79//config:       no longer works, I need to additionally keep a local copy
  80//config:       of ~4 megabyte source tarball of a SSL library and ~2 megabyte
  81//config:       source of wget, need to compile and built both before I can
  82//config:       download anything. All this despite the fact that the build
  83//config:       is done in a QEMU sandbox on a machine with absolutely nothing
  84//config:       worth stealing, so I don't care if someone would go to a lot
  85//config:       of trouble to intercept my HTTPS download to send me an altered
  86//config:       kernel tarball".
  87//config:
  88//config:       If you still think this is unacceptable, send patches.
  89//config:
  90//config:       If you still think this is unacceptable, do not want to send
  91//config:       patches, but do want to waste bandwidth expaining how wrong
  92//config:       it is, you will be ignored.
  93//config:
  94//config:config FEATURE_WGET_OPENSSL
  95//config:       bool "Try to connect to HTTPS using openssl"
  96//config:       default y
  97//config:       depends on WGET
  98//config:       help
  99//config:       Try to use openssl to handle HTTPS.
 100//config:
 101//config:       OpenSSL has a simple SSL client for debug purposes.
 102//config:       If you select this option, wget will effectively run:
 103//config:       "openssl s_client -quiet -connect hostname:443
 104//config:       -servername hostname 2>/dev/null" and pipe its data
 105//config:       through it. -servername is not used if hostname is numeric.
 106//config:       Note inconvenient API: host resolution is done twice,
 107//config:       and there is no guarantee openssl's idea of IPv6 address
 108//config:       format is the same as ours.
 109//config:       Another problem is that s_client prints debug information
 110//config:       to stderr, and it needs to be suppressed. This means
 111//config:       all error messages get suppressed too.
 112//config:       openssl is also a big binary, often dynamically linked
 113//config:       against ~15 libraries.
 114//config:
 115//config:       If openssl can't be executed, internal TLS code will be used
 116//config:       (if you enabled it); if openssl can be executed but fails later,
 117//config:       wget can't detect this, and download will fail.
 118
 119//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
 120
 121//kbuild:lib-$(CONFIG_WGET) += wget.o
 122
 123//usage:#define wget_trivial_usage
 124//usage:        IF_FEATURE_WGET_LONG_OPTIONS(
 125//usage:       "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
 126//usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
 127/* Since we ignore these opts, we don't show them in --help */
 128/* //usage:    "        [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
 129/* //usage:    "        [-nv] [-nc] [-nH] [-np]" */
 130//usage:       "        [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
 131//usage:        )
 132//usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
 133//usage:       "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
 134//usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
 135//usage:        )
 136//usage:#define wget_full_usage "\n\n"
 137//usage:       "Retrieve files via HTTP or FTP\n"
 138//usage:        IF_FEATURE_WGET_LONG_OPTIONS(
 139//usage:     "\n        --spider        Only check URL existence: $? is 0 if exists"
 140///////:     "\n        --no-check-certificate  Don't validate the server's certificate"
 141//usage:        )
 142//usage:     "\n        -c              Continue retrieval of aborted transfer"
 143//usage:     "\n        -q              Quiet"
 144//usage:     "\n        -P DIR          Save to DIR (default .)"
 145//usage:     "\n        -S              Show server response"
 146//usage:        IF_FEATURE_WGET_TIMEOUT(
 147//usage:     "\n        -T SEC          Network read timeout is SEC seconds"
 148//usage:        )
 149//usage:     "\n        -O FILE         Save to FILE ('-' for stdout)"
 150//usage:     "\n        -U STR          Use STR for User-Agent header"
 151//usage:     "\n        -Y on/off       Use proxy"
 152
 153#include "libbb.h"
 154
 155#if 0
 156# define log_io(...) bb_error_msg(__VA_ARGS__)
 157# define SENDFMT(fp, fmt, ...) \
 158        do { \
 159                log_io("> " fmt, ##__VA_ARGS__); \
 160                fprintf(fp, fmt, ##__VA_ARGS__); \
 161        } while (0);
 162#else
 163# define log_io(...) ((void)0)
 164# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
 165#endif
 166
 167
 168#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
 169
 170struct host_info {
 171        char *allocated;
 172        const char *path;
 173        char       *user;
 174        const char *protocol;
 175        char       *host;
 176        int         port;
 177};
 178static const char P_FTP[] ALIGN1 = "ftp";
 179static const char P_HTTP[] ALIGN1 = "http";
 180#if SSL_SUPPORTED
 181# if ENABLE_FEATURE_WGET_HTTPS
 182static const char P_FTPS[] ALIGN1 = "ftps";
 183# endif
 184static const char P_HTTPS[] ALIGN1 = "https";
 185#endif
 186
 187#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 188/* User-specified headers prevent using our corresponding built-in headers.  */
 189enum {
 190        HDR_HOST          = (1<<0),
 191        HDR_USER_AGENT    = (1<<1),
 192        HDR_RANGE         = (1<<2),
 193        HDR_AUTH          = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 194        HDR_PROXY_AUTH    = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 195};
 196static const char wget_user_headers[] ALIGN1 =
 197        "Host:\0"
 198        "User-Agent:\0"
 199        "Range:\0"
 200# if ENABLE_FEATURE_WGET_AUTHENTICATION
 201        "Authorization:\0"
 202        "Proxy-Authorization:\0"
 203# endif
 204        ;
 205# define USR_HEADER_HOST       (G.user_headers & HDR_HOST)
 206# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
 207# define USR_HEADER_RANGE      (G.user_headers & HDR_RANGE)
 208# define USR_HEADER_AUTH       (G.user_headers & HDR_AUTH)
 209# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
 210#else /* No long options, no user-headers :( */
 211# define USR_HEADER_HOST       0
 212# define USR_HEADER_USER_AGENT 0
 213# define USR_HEADER_RANGE      0
 214# define USR_HEADER_AUTH       0
 215# define USR_HEADER_PROXY_AUTH 0
 216#endif
 217
 218/* Globals */
 219struct globals {
 220        off_t content_len;        /* Content-length of the file */
 221        off_t beg_range;          /* Range at which continue begins */
 222#if ENABLE_FEATURE_WGET_STATUSBAR
 223        off_t transferred;        /* Number of bytes transferred so far */
 224        const char *curfile;      /* Name of current file being transferred */
 225        bb_progress_t pmt;
 226#endif
 227        char *dir_prefix;
 228#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 229        char *post_data;
 230        char *extra_headers;
 231        unsigned char user_headers; /* Headers mentioned by the user */
 232#endif
 233        char *fname_out;        /* where to direct output (-O) */
 234        const char *proxy_flag; /* Use proxies if env vars are set */
 235        const char *user_agent; /* "User-Agent" header field */
 236#if ENABLE_FEATURE_WGET_TIMEOUT
 237        unsigned timeout_seconds;
 238        bool die_if_timed_out;
 239#endif
 240        int output_fd;
 241        int o_flags;
 242        smallint chunked;         /* chunked transfer encoding */
 243        smallint got_clen;        /* got content-length: from server  */
 244        /* Local downloads do benefit from big buffer.
 245         * With 512 byte buffer, it was measured to be
 246         * an order of magnitude slower than with big one.
 247         */
 248        uint64_t just_to_align_next_member;
 249        char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
 250} FIX_ALIASING;
 251#define G (*ptr_to_globals)
 252#define INIT_G() do { \
 253        SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
 254} while (0)
 255#define FINI_G() do { \
 256        FREE_PTR_TO_GLOBALS(); \
 257} while (0)
 258
 259
 260/* Must match option string! */
 261enum {
 262        WGET_OPT_CONTINUE   = (1 << 0),
 263        WGET_OPT_QUIET      = (1 << 1),
 264        WGET_OPT_SERVER_RESPONSE = (1 << 2),
 265        WGET_OPT_OUTNAME    = (1 << 3),
 266        WGET_OPT_PREFIX     = (1 << 4),
 267        WGET_OPT_PROXY      = (1 << 5),
 268        WGET_OPT_USER_AGENT = (1 << 6),
 269        WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
 270        WGET_OPT_RETRIES    = (1 << 8),
 271        WGET_OPT_nsomething = (1 << 9),
 272        WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 273        WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 274        WGET_OPT_SPIDER     = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 275        WGET_OPT_NO_CHECK_CERT = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 276};
 277
 278enum {
 279        PROGRESS_START = -1,
 280        PROGRESS_END   = 0,
 281        PROGRESS_BUMP  = 1,
 282};
 283#if ENABLE_FEATURE_WGET_STATUSBAR
 284static void progress_meter(int flag)
 285{
 286        if (option_mask32 & WGET_OPT_QUIET)
 287                return;
 288
 289        if (flag == PROGRESS_START)
 290                bb_progress_init(&G.pmt, G.curfile);
 291
 292        bb_progress_update(&G.pmt,
 293                        G.beg_range,
 294                        G.transferred,
 295                        (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
 296        );
 297
 298        if (flag == PROGRESS_END) {
 299                bb_progress_free(&G.pmt);
 300                bb_putchar_stderr('\n');
 301                G.transferred = 0;
 302        }
 303}
 304#else
 305static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
 306#endif
 307
 308
 309/* IPv6 knows scoped address types i.e. link and site local addresses. Link
 310 * local addresses can have a scope identifier to specify the
 311 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
 312 * identifier is only valid on a single node.
 313 *
 314 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
 315 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
 316 * in the Host header as invalid requests, see
 317 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
 318 */
 319static void strip_ipv6_scope_id(char *host)
 320{
 321        char *scope, *cp;
 322
 323        /* bbox wget actually handles IPv6 addresses without [], like
 324         * wget "http://::1/xxx", but this is not standard.
 325         * To save code, _here_ we do not support it. */
 326
 327        if (host[0] != '[')
 328                return; /* not IPv6 */
 329
 330        scope = strchr(host, '%');
 331        if (!scope)
 332                return;
 333
 334        /* Remove the IPv6 zone identifier from the host address */
 335        cp = strchr(host, ']');
 336        if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
 337                /* malformed address (not "[xx]:nn" or "[xx]") */
 338                return;
 339        }
 340
 341        /* cp points to "]...", scope points to "%eth0]..." */
 342        overlapping_strcpy(scope, cp);
 343}
 344
 345#if ENABLE_FEATURE_WGET_AUTHENTICATION
 346/* Base64-encode character string. */
 347static char *base64enc(const char *str)
 348{
 349        unsigned len = strlen(str);
 350        if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
 351                len = sizeof(G.wget_buf)/4*3 - 10;
 352        bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
 353        return G.wget_buf;
 354}
 355#endif
 356
 357#if ENABLE_FEATURE_WGET_TIMEOUT
 358static void alarm_handler(int sig UNUSED_PARAM)
 359{
 360        /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
 361        if (G.die_if_timed_out)
 362                bb_error_msg_and_die("download timed out");
 363}
 364static void set_alarm(void)
 365{
 366        if (G.timeout_seconds) {
 367                alarm(G.timeout_seconds);
 368                G.die_if_timed_out = 1;
 369        }
 370}
 371# define clear_alarm() ((void)(G.die_if_timed_out = 0))
 372#else
 373# define set_alarm()   ((void)0)
 374# define clear_alarm() ((void)0)
 375#endif
 376
 377#if ENABLE_FEATURE_WGET_OPENSSL
 378/*
 379 * is_ip_address() attempts to verify whether or not a string
 380 * contains an IPv4 or IPv6 address (vs. an FQDN).  The result
 381 * of inet_pton() can be used to determine this.
 382 *
 383 * TODO add proper error checking when inet_pton() returns -1
 384 * (some form of system error has occurred, and errno is set)
 385 */
 386static int is_ip_address(const char *string)
 387{
 388        struct sockaddr_in sa;
 389
 390        int result = inet_pton(AF_INET, string, &(sa.sin_addr));
 391# if ENABLE_FEATURE_IPV6
 392        if (result == 0) {
 393                struct sockaddr_in6 sa6;
 394                result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
 395        }
 396# endif
 397        return (result == 1);
 398}
 399#endif
 400
 401static FILE *open_socket(len_and_sockaddr *lsa)
 402{
 403        int fd;
 404        FILE *fp;
 405
 406        set_alarm();
 407        fd = xconnect_stream(lsa);
 408        clear_alarm();
 409
 410        /* glibc 2.4 seems to try seeking on it - ??! */
 411        /* hopefully it understands what ESPIPE means... */
 412        fp = fdopen(fd, "r+");
 413        if (!fp)
 414                bb_die_memory_exhausted();
 415
 416        return fp;
 417}
 418
 419/* We balk at any control chars in other side's messages.
 420 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
 421 * and error messages.
 422 *
 423 * The only exception is tabs, which are converted to (one) space:
 424 * HTTP's "headers: <whitespace> values" may have those.
 425 */
 426static char* sanitize_string(char *s)
 427{
 428        unsigned char *p = (void *) s;
 429        while (*p) {
 430                if (*p < ' ') {
 431                        if (*p != '\t')
 432                                break;
 433                        *p = ' ';
 434                }
 435                p++;
 436        }
 437        *p = '\0';
 438        return s;
 439}
 440
 441/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
 442static char fgets_trim_sanitize(FILE *fp, const char *fmt)
 443{
 444        char c;
 445        char *buf_ptr;
 446
 447        set_alarm();
 448        if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
 449                bb_perror_msg_and_die("error getting response");
 450        clear_alarm();
 451
 452        buf_ptr = strchrnul(G.wget_buf, '\n');
 453        c = *buf_ptr;
 454#if 1
 455        /* Disallow any control chars: trim at first char < 0x20 */
 456        sanitize_string(G.wget_buf);
 457#else
 458        *buf_ptr = '\0';
 459        buf_ptr = strchrnul(G.wget_buf, '\r');
 460        *buf_ptr = '\0';
 461#endif
 462
 463        log_io("< %s", G.wget_buf);
 464
 465        if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
 466                fprintf(stderr, fmt, G.wget_buf);
 467
 468        return c;
 469}
 470
 471static int ftpcmd(const char *s1, const char *s2, FILE *fp)
 472{
 473        int result;
 474        if (s1) {
 475                if (!s2)
 476                        s2 = "";
 477                fprintf(fp, "%s%s\r\n", s1, s2);
 478                /* With --server-response, wget also shows its ftp commands */
 479                if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
 480                        fprintf(stderr, "--> %s%s\n\n", s1, s2);
 481                fflush(fp);
 482                log_io("> %s%s", s1, s2);
 483        }
 484
 485        /* Read until "Nxx something" is received */
 486        G.wget_buf[3] = 0;
 487        do {
 488                fgets_trim_sanitize(fp, "%s\n");
 489        } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
 490
 491        G.wget_buf[3] = '\0';
 492        result = xatoi_positive(G.wget_buf);
 493        G.wget_buf[3] = ' ';
 494        return result;
 495}
 496
 497static void parse_url(const char *src_url, struct host_info *h)
 498{
 499        char *url, *p, *sp;
 500
 501        free(h->allocated);
 502        h->allocated = url = xstrdup(src_url);
 503
 504        h->protocol = P_FTP;
 505        p = strstr(url, "://");
 506        if (p) {
 507                *p = '\0';
 508                h->host = p + 3;
 509                if (strcmp(url, P_FTP) == 0) {
 510                        h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
 511                } else
 512#if SSL_SUPPORTED
 513# if ENABLE_FEATURE_WGET_HTTPS
 514                if (strcmp(url, P_FTPS) == 0) {
 515                        h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
 516                        h->protocol = P_FTPS;
 517                } else
 518# endif
 519                if (strcmp(url, P_HTTPS) == 0) {
 520                        h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
 521                        h->protocol = P_HTTPS;
 522                } else
 523#endif
 524                if (strcmp(url, P_HTTP) == 0) {
 525 http:
 526                        h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
 527                        h->protocol = P_HTTP;
 528                } else {
 529                        *p = ':';
 530                        bb_error_msg_and_die("not an http or ftp url: %s", url);
 531                }
 532        } else {
 533                // GNU wget is user-friendly and falls back to http://
 534                h->host = url;
 535                goto http;
 536        }
 537
 538        // FYI:
 539        // "Real" wget 'http://busybox.net?var=a/b' sends this request:
 540        //   'GET /?var=a/b HTTP/1.0'
 541        //   and saves 'index.html?var=a%2Fb' (we save 'b')
 542        // wget 'http://busybox.net?login=john@doe':
 543        //   request: 'GET /?login=john@doe HTTP/1.0'
 544        //   saves: 'index.html?login=john@doe' (we save 'login=john@doe')
 545        // wget 'http://busybox.net#test/test':
 546        //   request: 'GET / HTTP/1.0'
 547        //   saves: 'index.html' (we save 'test')
 548        //
 549        // We also don't add unique .N suffix if file exists...
 550        sp = strchr(h->host, '/');
 551        p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
 552        p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
 553        if (!sp) {
 554                h->path = "";
 555        } else if (*sp == '/') {
 556                *sp = '\0';
 557                h->path = sp + 1;
 558        } else {
 559                // sp points to '#' or '?'
 560                // Note:
 561                // http://busybox.net?login=john@doe is a valid URL
 562                // (without '/' between ".net" and "?"),
 563                // can't store NUL at sp[-1] - this destroys hostname.
 564                *sp++ = '\0';
 565                h->path = sp;
 566        }
 567
 568        sp = strrchr(h->host, '@');
 569        if (sp != NULL) {
 570                // URL-decode "user:password" string before base64-encoding:
 571                // wget http://test:my%20pass@example.com should send
 572                // Authorization: Basic dGVzdDpteSBwYXNz
 573                // which decodes to "test:my pass".
 574                // Standard wget and curl do this too.
 575                *sp = '\0';
 576                free(h->user);
 577                h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
 578                h->host = sp + 1;
 579        }
 580        /* else: h->user remains NULL, or as set by original request
 581         * before redirect (if we are here after a redirect).
 582         */
 583}
 584
 585static char *get_sanitized_hdr(FILE *fp)
 586{
 587        char *s, *hdrval;
 588        int c;
 589
 590        /* retrieve header line */
 591        c = fgets_trim_sanitize(fp, "  %s\n");
 592
 593        /* end of the headers? */
 594        if (G.wget_buf[0] == '\0')
 595                return NULL;
 596
 597        /* convert the header name to lower case */
 598        for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
 599                /*
 600                 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
 601                 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
 602                 * "A-Z" maps to "a-z".
 603                 * "@[\]" can't occur in header names.
 604                 * "^_" maps to "~,DEL" (which is wrong).
 605                 * "^" was never seen yet, "_" was seen from web.archive.org
 606                 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
 607                 */
 608                *s |= 0x20;
 609        }
 610
 611        /* verify we are at the end of the header name */
 612        if (*s != ':')
 613                bb_error_msg_and_die("bad header line: %s", G.wget_buf);
 614
 615        /* locate the start of the header value */
 616        *s++ = '\0';
 617        hdrval = skip_whitespace(s);
 618
 619        if (c != '\n') {
 620                /* Rats! The buffer isn't big enough to hold the entire header value */
 621                while (c = getc(fp), c != EOF && c != '\n')
 622                        continue;
 623        }
 624
 625        return hdrval;
 626}
 627
 628static void reset_beg_range_to_zero(void)
 629{
 630        bb_error_msg("restart failed");
 631        G.beg_range = 0;
 632        xlseek(G.output_fd, 0, SEEK_SET);
 633        /* Done at the end instead: */
 634        /* ftruncate(G.output_fd, 0); */
 635}
 636
 637#if ENABLE_FEATURE_WGET_OPENSSL
 638static int spawn_https_helper_openssl(const char *host, unsigned port)
 639{
 640        char *allocated = NULL;
 641        char *servername;
 642        int sp[2];
 643        int pid;
 644        IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
 645
 646        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
 647                /* Kernel can have AF_UNIX support disabled */
 648                bb_perror_msg_and_die("socketpair");
 649
 650        if (!strchr(host, ':'))
 651                host = allocated = xasprintf("%s:%u", host, port);
 652        servername = xstrdup(host);
 653        strrchr(servername, ':')[0] = '\0';
 654
 655        fflush_all();
 656        pid = xvfork();
 657        if (pid == 0) {
 658                /* Child */
 659                char *argv[8];
 660
 661                close(sp[0]);
 662                xmove_fd(sp[1], 0);
 663                xdup2(0, 1);
 664                /*
 665                 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
 666                 * It prints some debug stuff on stderr, don't know how to suppress it.
 667                 * Work around by dev-nulling stderr. We lose all error messages :(
 668                 */
 669                xmove_fd(2, 3);
 670                xopen("/dev/null", O_RDWR);
 671                memset(&argv, 0, sizeof(argv));
 672                argv[0] = (char*)"openssl";
 673                argv[1] = (char*)"s_client";
 674                argv[2] = (char*)"-quiet";
 675                argv[3] = (char*)"-connect";
 676                argv[4] = (char*)host;
 677                /*
 678                 * Per RFC 6066 Section 3, the only permitted values in the
 679                 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
 680                 * IPv4 and IPv6 addresses, port numbers are not allowed.
 681                 */
 682                if (!is_ip_address(servername)) {
 683                        argv[5] = (char*)"-servername";
 684                        argv[6] = (char*)servername;
 685                }
 686
 687                BB_EXECVP(argv[0], argv);
 688                xmove_fd(3, 2);
 689# if ENABLE_FEATURE_WGET_HTTPS
 690                child_failed = 1;
 691                xfunc_die();
 692# else
 693                bb_perror_msg_and_die("can't execute '%s'", argv[0]);
 694# endif
 695                /* notreached */
 696        }
 697
 698        /* Parent */
 699        free(servername);
 700        free(allocated);
 701        close(sp[1]);
 702# if ENABLE_FEATURE_WGET_HTTPS
 703        if (child_failed) {
 704                close(sp[0]);
 705                return -1;
 706        }
 707# endif
 708        return sp[0];
 709}
 710#endif
 711
 712#if ENABLE_FEATURE_WGET_HTTPS
 713static void spawn_ssl_client(const char *host, int network_fd, int flags)
 714{
 715        int sp[2];
 716        int pid;
 717        char *servername, *p;
 718
 719        if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT))
 720                bb_error_msg("note: TLS certificate validation not implemented");
 721
 722        servername = xstrdup(host);
 723        p = strrchr(servername, ':');
 724        if (p) *p = '\0';
 725
 726        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
 727                /* Kernel can have AF_UNIX support disabled */
 728                bb_perror_msg_and_die("socketpair");
 729
 730        fflush_all();
 731        pid = BB_MMU ? xfork() : xvfork();
 732        if (pid == 0) {
 733                /* Child */
 734                close(sp[0]);
 735                xmove_fd(sp[1], 0);
 736                xdup2(0, 1);
 737                if (BB_MMU) {
 738                        tls_state_t *tls = new_tls_state();
 739                        tls->ifd = tls->ofd = network_fd;
 740                        tls_handshake(tls, servername);
 741                        tls_run_copy_loop(tls, flags);
 742                        exit(0);
 743                } else {
 744                        char *argv[6];
 745
 746                        xmove_fd(network_fd, 3);
 747                        argv[0] = (char*)"ssl_client";
 748                        argv[1] = (char*)"-s3";
 749                        //TODO: if (!is_ip_address(servername))...
 750                        argv[2] = (char*)"-n";
 751                        argv[3] = servername;
 752                        argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
 753                        argv[5] = NULL;
 754                        BB_EXECVP(argv[0], argv);
 755                        bb_perror_msg_and_die("can't execute '%s'", argv[0]);
 756                }
 757                /* notreached */
 758        }
 759
 760        /* Parent */
 761        free(servername);
 762        close(sp[1]);
 763        xmove_fd(sp[0], network_fd);
 764}
 765#endif
 766
 767static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 768{
 769        FILE *sfp;
 770        char *pass;
 771        int port;
 772
 773        sfp = open_socket(lsa);
 774#if ENABLE_FEATURE_WGET_HTTPS
 775        if (target->protocol == P_FTPS)
 776                spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
 777#endif
 778
 779        if (ftpcmd(NULL, NULL, sfp) != 220)
 780                bb_error_msg_and_die("%s", G.wget_buf);
 781                /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
 782
 783        /* Split username:password pair */
 784        pass = (char*)"busybox"; /* password for "anonymous" */
 785        if (target->user) {
 786                pass = strchr(target->user, ':');
 787                if (pass)
 788                        *pass++ = '\0';
 789        }
 790
 791        /* Log in */
 792        switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
 793        case 230:
 794                break;
 795        case 331:
 796                if (ftpcmd("PASS ", pass, sfp) == 230)
 797                        break;
 798                /* fall through (failed login) */
 799        default:
 800                bb_error_msg_and_die("ftp login: %s", G.wget_buf);
 801        }
 802
 803        ftpcmd("TYPE I", NULL, sfp);
 804
 805        /* Query file size */
 806        if (ftpcmd("SIZE ", target->path, sfp) == 213) {
 807                G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
 808                if (G.content_len < 0 || errno) {
 809                        bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
 810                }
 811                G.got_clen = 1;
 812        }
 813
 814        /* Enter passive mode */
 815        if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
 816                /* good */
 817        } else
 818        if (ftpcmd("PASV", NULL, sfp) != 227) {
 819 pasv_error:
 820                bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
 821        }
 822        port = parse_pasv_epsv(G.wget_buf);
 823        if (port < 0)
 824                goto pasv_error;
 825
 826        set_nport(&lsa->u.sa, htons(port));
 827
 828        *dfpp = open_socket(lsa);
 829
 830#if ENABLE_FEATURE_WGET_HTTPS
 831        if (target->protocol == P_FTPS) {
 832                /* "PROT P" enables encryption of data stream.
 833                 * Without it (or with "PROT C"), data is sent unencrypted.
 834                 */
 835                if (ftpcmd("PROT P", NULL, sfp) == 200)
 836                        spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
 837        }
 838#endif
 839
 840        if (G.beg_range != 0) {
 841                sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
 842                if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
 843                        G.content_len -= G.beg_range;
 844                else
 845                        reset_beg_range_to_zero();
 846        }
 847
 848//TODO: needs ftp-escaping 0xff and '\n' bytes here.
 849//Or disallow '\n' altogether via sanitize_string() in parse_url().
 850//But 0xff's are possible in valid utf8 filenames.
 851        if (ftpcmd("RETR ", target->path, sfp) > 150)
 852                bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
 853
 854        return sfp;
 855}
 856
 857static void NOINLINE retrieve_file_data(FILE *dfp)
 858{
 859#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 860# if ENABLE_FEATURE_WGET_TIMEOUT
 861        unsigned second_cnt = G.timeout_seconds;
 862# endif
 863        struct pollfd polldata;
 864
 865        polldata.fd = fileno(dfp);
 866        polldata.events = POLLIN | POLLPRI;
 867#endif
 868        progress_meter(PROGRESS_START);
 869
 870        if (G.chunked)
 871                goto get_clen;
 872
 873        /* Loops only if chunked */
 874        while (1) {
 875
 876#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 877                /* Must use nonblocking I/O, otherwise fread will loop
 878                 * and *block* until it reads full buffer,
 879                 * which messes up progress bar and/or timeout logic.
 880                 * Because of nonblocking I/O, we need to dance
 881                 * very carefully around EAGAIN. See explanation at
 882                 * clearerr() calls.
 883                 */
 884                ndelay_on(polldata.fd);
 885#endif
 886                while (1) {
 887                        int n;
 888                        unsigned rdsz;
 889
 890#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 891                        /* fread internally uses read loop, which in our case
 892                         * is usually exited when we get EAGAIN.
 893                         * In this case, libc sets error marker on the stream.
 894                         * Need to clear it before next fread to avoid possible
 895                         * rare false positive ferror below. Rare because usually
 896                         * fread gets more than zero bytes, and we don't fall
 897                         * into if (n <= 0) ...
 898                         */
 899                        clearerr(dfp);
 900#endif
 901                        errno = 0;
 902                        rdsz = sizeof(G.wget_buf);
 903                        if (G.got_clen) {
 904                                if (G.content_len < (off_t)sizeof(G.wget_buf)) {
 905                                        if ((int)G.content_len <= 0)
 906                                                break;
 907                                        rdsz = (unsigned)G.content_len;
 908                                }
 909                        }
 910                        n = fread(G.wget_buf, 1, rdsz, dfp);
 911
 912                        if (n > 0) {
 913                                xwrite(G.output_fd, G.wget_buf, n);
 914#if ENABLE_FEATURE_WGET_STATUSBAR
 915                                G.transferred += n;
 916#endif
 917                                if (G.got_clen) {
 918                                        G.content_len -= n;
 919                                        if (G.content_len == 0)
 920                                                break;
 921                                }
 922#if ENABLE_FEATURE_WGET_TIMEOUT
 923                                second_cnt = G.timeout_seconds;
 924#endif
 925                                goto bump;
 926                        }
 927
 928                        /* n <= 0.
 929                         * man fread:
 930                         * If error occurs, or EOF is reached, the return value
 931                         * is a short item count (or zero).
 932                         * fread does not distinguish between EOF and error.
 933                         */
 934                        if (errno != EAGAIN) {
 935                                if (ferror(dfp)) {
 936                                        progress_meter(PROGRESS_END);
 937                                        bb_perror_msg_and_die(bb_msg_read_error);
 938                                }
 939                                break; /* EOF, not error */
 940                        }
 941
 942#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 943                        /* It was EAGAIN. There is no data. Wait up to one second
 944                         * then abort if timed out, or update the bar and try reading again.
 945                         */
 946                        if (safe_poll(&polldata, 1, 1000) == 0) {
 947# if ENABLE_FEATURE_WGET_TIMEOUT
 948                                if (second_cnt != 0 && --second_cnt == 0) {
 949                                        progress_meter(PROGRESS_END);
 950                                        bb_error_msg_and_die("download timed out");
 951                                }
 952# endif
 953                                /* We used to loop back to poll here,
 954                                 * but there is no great harm in letting fread
 955                                 * to try reading anyway.
 956                                 */
 957                        }
 958#endif
 959 bump:
 960                        /* Need to do it _every_ second for "stalled" indicator
 961                         * to be shown properly.
 962                         */
 963                        progress_meter(PROGRESS_BUMP);
 964                } /* while (reading data) */
 965
 966#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 967                clearerr(dfp);
 968                ndelay_off(polldata.fd); /* else fgets can get very unhappy */
 969#endif
 970                if (!G.chunked)
 971                        break;
 972
 973                /* Each chunk ends with "\r\n" - eat it */
 974                fgets_trim_sanitize(dfp, NULL);
 975 get_clen:
 976                /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
 977                fgets_trim_sanitize(dfp, NULL);
 978                errno = 0;
 979                G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
 980                /*
 981                 * Had a bug with inputs like "ffffffff0001f400"
 982                 * smashing the heap later. Ensure >= 0.
 983                 */
 984                if (G.content_len < 0 || errno)
 985                        bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
 986                if (G.content_len == 0)
 987                        break; /* all done! */
 988                G.got_clen = 1;
 989                /*
 990                 * Note that fgets may result in some data being buffered in dfp.
 991                 * We loop back to fread, which will retrieve this data.
 992                 * Also note that code has to be arranged so that fread
 993                 * is done _before_ one-second poll wait - poll doesn't know
 994                 * about stdio buffering and can result in spurious one second waits!
 995                 */
 996        }
 997
 998        /* If -c failed, we restart from the beginning,
 999         * but we do not truncate file then, we do it only now, at the end.
1000         * This lets user to ^C if his 99% complete 10 GB file download
1001         * failed to restart *without* losing the almost complete file.
1002         */
1003        {
1004                off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1005                if (pos != (off_t)-1)
1006                        ftruncate(G.output_fd, pos);
1007        }
1008
1009        /* Draw full bar and free its resources */
1010        G.chunked = 0;  /* makes it show 100% even for chunked download */
1011        G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1012        progress_meter(PROGRESS_END);
1013}
1014
1015static void download_one_url(const char *url)
1016{
1017        bool use_proxy;                 /* Use proxies if env vars are set  */
1018        int redir_limit;
1019        len_and_sockaddr *lsa;
1020        FILE *sfp;                      /* socket to web/ftp server         */
1021        FILE *dfp;                      /* socket to ftp server (data)      */
1022        char *fname_out_alloc;
1023        char *redirected_path = NULL;
1024        struct host_info server;
1025        struct host_info target;
1026
1027        server.allocated = NULL;
1028        target.allocated = NULL;
1029        server.user = NULL;
1030        target.user = NULL;
1031
1032        parse_url(url, &target);
1033
1034        /* Use the proxy if necessary */
1035        use_proxy = (strcmp(G.proxy_flag, "off") != 0);
1036        if (use_proxy) {
1037                char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
1038//FIXME: what if protocol is https? Ok to use http_proxy?
1039                use_proxy = (proxy && proxy[0]);
1040                if (use_proxy)
1041                        parse_url(proxy, &server);
1042        }
1043        if (!use_proxy) {
1044                server.protocol = target.protocol;
1045                server.port = target.port;
1046                if (ENABLE_FEATURE_IPV6) {
1047                        //free(server.allocated); - can't be non-NULL
1048                        server.host = server.allocated = xstrdup(target.host);
1049                } else {
1050                        server.host = target.host;
1051                }
1052        }
1053
1054        if (ENABLE_FEATURE_IPV6)
1055                strip_ipv6_scope_id(target.host);
1056
1057        /* If there was no -O FILE, guess output filename */
1058        fname_out_alloc = NULL;
1059        if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1060                G.fname_out = bb_get_last_path_component_nostrip(target.path);
1061                /* handle "wget http://kernel.org//" */
1062                if (G.fname_out[0] == '/' || !G.fname_out[0])
1063                        G.fname_out = (char*)"index.html";
1064                /* -P DIR is considered only if there was no -O FILE */
1065                if (G.dir_prefix)
1066                        G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
1067                else {
1068                        /* redirects may free target.path later, need to make a copy */
1069                        G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
1070                }
1071        }
1072#if ENABLE_FEATURE_WGET_STATUSBAR
1073        G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
1074#endif
1075
1076        /* Determine where to start transfer */
1077        G.beg_range = 0;
1078        if (option_mask32 & WGET_OPT_CONTINUE) {
1079                G.output_fd = open(G.fname_out, O_WRONLY);
1080                if (G.output_fd >= 0) {
1081                        G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
1082                }
1083                /* File doesn't exist. We do not create file here yet.
1084                 * We are not sure it exists on remote side */
1085        }
1086
1087        redir_limit = 5;
1088 resolve_lsa:
1089        lsa = xhost2sockaddr(server.host, server.port);
1090        if (!(option_mask32 & WGET_OPT_QUIET)) {
1091                char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1092                fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1093                free(s);
1094        }
1095 establish_session:
1096        /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1097        G.got_clen = 0;
1098        G.chunked = 0;
1099        if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
1100                /*
1101                 *  HTTP session
1102                 */
1103                char *str;
1104                int status;
1105
1106                /* Open socket to http(s) server */
1107#if ENABLE_FEATURE_WGET_OPENSSL
1108                /* openssl (and maybe internal TLS) support is configured */
1109                if (server.protocol == P_HTTPS) {
1110                        /* openssl-based helper
1111                         * Inconvenient API since we can't give it an open fd
1112                         */
1113                        int fd = spawn_https_helper_openssl(server.host, server.port);
1114# if ENABLE_FEATURE_WGET_HTTPS
1115                        if (fd < 0) { /* no openssl? try internal */
1116                                sfp = open_socket(lsa);
1117                                spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1118                                goto socket_opened;
1119                        }
1120# else
1121                        /* We don't check for exec("openssl") failure in this case */
1122# endif
1123                        sfp = fdopen(fd, "r+");
1124                        if (!sfp)
1125                                bb_die_memory_exhausted();
1126                        goto socket_opened;
1127                }
1128                sfp = open_socket(lsa);
1129 socket_opened:
1130#elif ENABLE_FEATURE_WGET_HTTPS
1131                /* Only internal TLS support is configured */
1132                sfp = open_socket(lsa);
1133                if (server.protocol == P_HTTPS)
1134                        spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1135#else
1136                /* ssl (https) support is not configured */
1137                sfp = open_socket(lsa);
1138#endif
1139                /* Send HTTP request */
1140                if (use_proxy) {
1141                        SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1142                                target.protocol, target.host,
1143                                target.path);
1144                } else {
1145                        SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1146                                (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1147                                target.path);
1148                }
1149                if (!USR_HEADER_HOST)
1150                        SENDFMT(sfp, "Host: %s\r\n", target.host);
1151                if (!USR_HEADER_USER_AGENT)
1152                        SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1153
1154                /* Ask server to close the connection as soon as we are done
1155                 * (IOW: we do not intend to send more requests)
1156                 */
1157                SENDFMT(sfp, "Connection: close\r\n");
1158
1159#if ENABLE_FEATURE_WGET_AUTHENTICATION
1160                if (target.user && !USR_HEADER_AUTH) {
1161                        SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1162                                base64enc(target.user));
1163                }
1164                if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1165                        SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1166                                base64enc(server.user));
1167                }
1168#endif
1169
1170                if (G.beg_range != 0 && !USR_HEADER_RANGE)
1171                        SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1172
1173#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1174                if (G.extra_headers) {
1175                        log_io(G.extra_headers);
1176                        fputs(G.extra_headers, sfp);
1177                }
1178
1179                if (option_mask32 & WGET_OPT_POST_DATA) {
1180                        SENDFMT(sfp,
1181                                "Content-Type: application/x-www-form-urlencoded\r\n"
1182                                "Content-Length: %u\r\n"
1183                                "\r\n"
1184                                "%s",
1185                                (int) strlen(G.post_data), G.post_data
1186                        );
1187                } else
1188#endif
1189                {
1190                        SENDFMT(sfp, "\r\n");
1191                }
1192
1193                fflush(sfp);
1194
1195/* Tried doing this unconditionally.
1196 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1197 */
1198#if SSL_SUPPORTED
1199                if (target.protocol == P_HTTPS) {
1200                        /* If we use SSL helper, keeping our end of the socket open for writing
1201                         * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1202                         * even after child closes its copy of the fd.
1203                         * This helps:
1204                         */
1205                        shutdown(fileno(sfp), SHUT_WR);
1206                }
1207#endif
1208
1209                /*
1210                 * Retrieve HTTP response line and check for "200" status code.
1211                 */
1212 read_response:
1213                fgets_trim_sanitize(sfp, "  %s\n");
1214
1215                str = G.wget_buf;
1216                str = skip_non_whitespace(str);
1217                str = skip_whitespace(str);
1218                // FIXME: no error check
1219                // xatou wouldn't work: "200 OK"
1220                status = atoi(str);
1221                switch (status) {
1222                case 0:
1223                case 100:
1224                        while (get_sanitized_hdr(sfp) != NULL)
1225                                /* eat all remaining headers */;
1226                        goto read_response;
1227
1228                /* Success responses */
1229                case 200:
1230                        /* fall through */
1231                case 201: /* 201 Created */
1232/* "The request has been fulfilled and resulted in a new resource being created" */
1233                        /* Standard wget is reported to treat this as success */
1234                        /* fall through */
1235                case 202: /* 202 Accepted */
1236/* "The request has been accepted for processing, but the processing has not been completed" */
1237                        /* Treat as success: fall through */
1238                case 203: /* 203 Non-Authoritative Information */
1239/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1240                        /* fall through */
1241                case 204: /* 204 No Content */
1242/*
1243Response 204 doesn't say "null file", it says "metadata
1244has changed but data didn't":
1245
1246"10.2.5 204 No Content
1247The server has fulfilled the request but does not need to return
1248an entity-body, and might want to return updated metainformation.
1249The response MAY include new or updated metainformation in the form
1250of entity-headers, which if present SHOULD be associated with
1251the requested variant.
1252
1253If the client is a user agent, it SHOULD NOT change its document
1254view from that which caused the request to be sent. This response
1255is primarily intended to allow input for actions to take place
1256without causing a change to the user agent's active document view,
1257although any new or updated metainformation SHOULD be applied
1258to the document currently in the user agent's active view.
1259
1260The 204 response MUST NOT include a message-body, and thus
1261is always terminated by the first empty line after the header fields."
1262
1263However, in real world it was observed that some web servers
1264(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1265*/
1266                        if (G.beg_range != 0) {
1267                                /* "Range:..." was not honored by the server.
1268                                 * Restart download from the beginning.
1269                                 */
1270                                reset_beg_range_to_zero();
1271                        }
1272                        break;
1273                /* 205 Reset Content ?? what to do on this ??   */
1274
1275                case 300:  /* redirection */
1276                case 301:
1277                case 302:
1278                case 303:
1279                        break;
1280
1281                case 206: /* Partial Content */
1282                        if (G.beg_range != 0)
1283                                /* "Range:..." worked. Good. */
1284                                break;
1285                        /* Partial Content even though we did not ask for it??? */
1286                        /* fall through */
1287                default:
1288                        bb_error_msg_and_die("server returned error: %s", G.wget_buf);
1289                }
1290
1291                /*
1292                 * Retrieve HTTP headers.
1293                 */
1294                while ((str = get_sanitized_hdr(sfp)) != NULL) {
1295                        static const char keywords[] ALIGN1 =
1296                                "content-length\0""transfer-encoding\0""location\0";
1297                        enum {
1298                                KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1299                        };
1300                        smalluint key;
1301
1302                        /* get_sanitized_hdr converted "FOO:" string to lowercase */
1303
1304                        /* strip trailing whitespace */
1305                        char *s = strchrnul(str, '\0') - 1;
1306                        while (s >= str && (*s == ' ' || *s == '\t')) {
1307                                *s = '\0';
1308                                s--;
1309                        }
1310                        key = index_in_strings(keywords, G.wget_buf) + 1;
1311                        if (key == KEY_content_length) {
1312                                G.content_len = BB_STRTOOFF(str, NULL, 10);
1313                                if (G.content_len < 0 || errno) {
1314                                        bb_error_msg_and_die("content-length %s is garbage", str);
1315                                }
1316                                G.got_clen = 1;
1317                                continue;
1318                        }
1319                        if (key == KEY_transfer_encoding) {
1320                                if (strcmp(str_tolower(str), "chunked") != 0)
1321                                        bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
1322                                G.chunked = 1;
1323                        }
1324                        if (key == KEY_location && status >= 300) {
1325                                if (--redir_limit == 0)
1326                                        bb_error_msg_and_die("too many redirections");
1327                                fclose(sfp);
1328                                if (str[0] == '/') {
1329                                        free(redirected_path);
1330                                        target.path = redirected_path = xstrdup(str + 1);
1331                                        /* lsa stays the same: it's on the same server */
1332                                } else {
1333                                        parse_url(str, &target);
1334                                        if (!use_proxy) {
1335                                                /* server.user remains untouched */
1336                                                free(server.allocated);
1337                                                server.allocated = NULL;
1338                                                server.protocol = target.protocol;
1339                                                server.host = target.host;
1340                                                /* strip_ipv6_scope_id(target.host); - no! */
1341                                                /* we assume remote never gives us IPv6 addr with scope id */
1342                                                server.port = target.port;
1343                                                free(lsa);
1344                                                goto resolve_lsa;
1345                                        } /* else: lsa stays the same: we use proxy */
1346                                }
1347                                goto establish_session;
1348                        }
1349                }
1350//              if (status >= 300)
1351//                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
1352
1353                /* For HTTP, data is pumped over the same connection */
1354                dfp = sfp;
1355        } else {
1356                /*
1357                 *  FTP session
1358                 */
1359                sfp = prepare_ftp_session(&dfp, &target, lsa);
1360        }
1361
1362        free(lsa);
1363
1364        if (!(option_mask32 & WGET_OPT_SPIDER)) {
1365                if (G.output_fd < 0)
1366                        G.output_fd = xopen(G.fname_out, G.o_flags);
1367                retrieve_file_data(dfp);
1368                if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1369                        xclose(G.output_fd);
1370                        G.output_fd = -1;
1371                }
1372        }
1373
1374        if (dfp != sfp) {
1375                /* It's ftp. Close data connection properly */
1376                fclose(dfp);
1377                if (ftpcmd(NULL, NULL, sfp) != 226)
1378                        bb_error_msg_and_die("ftp error: %s", G.wget_buf);
1379                /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1380        }
1381        fclose(sfp);
1382
1383        free(server.allocated);
1384        free(target.allocated);
1385        free(server.user);
1386        free(target.user);
1387        free(fname_out_alloc);
1388        free(redirected_path);
1389}
1390
1391int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1392int wget_main(int argc UNUSED_PARAM, char **argv)
1393{
1394#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1395        static const char wget_longopts[] ALIGN1 =
1396                /* name, has_arg, val */
1397                "continue\0"         No_argument       "c"
1398                "quiet\0"            No_argument       "q"
1399                "server-response\0"  No_argument       "S"
1400                "output-document\0"  Required_argument "O"
1401                "directory-prefix\0" Required_argument "P"
1402                "proxy\0"            Required_argument "Y"
1403                "user-agent\0"       Required_argument "U"
1404IF_FEATURE_WGET_TIMEOUT(
1405                "timeout\0"          Required_argument "T")
1406                /* Ignored: */
1407IF_DESKTOP(     "tries\0"            Required_argument "t")
1408                "header\0"           Required_argument "\xff"
1409                "post-data\0"        Required_argument "\xfe"
1410                "spider\0"           No_argument       "\xfd"
1411                "no-check-certificate\0" No_argument   "\xfc"
1412                /* Ignored (we always use PASV): */
1413IF_DESKTOP(     "passive-ftp\0"      No_argument       "\xf0")
1414                /* Ignored (we don't support caching) */
1415IF_DESKTOP(     "no-cache\0"         No_argument       "\xf0")
1416IF_DESKTOP(     "no-verbose\0"       No_argument       "\xf0")
1417IF_DESKTOP(     "no-clobber\0"       No_argument       "\xf0")
1418IF_DESKTOP(     "no-host-directories\0" No_argument    "\xf0")
1419IF_DESKTOP(     "no-parent\0"        No_argument       "\xf0")
1420                ;
1421# define GETOPT32 getopt32long
1422# define LONGOPTS ,wget_longopts
1423#else
1424# define GETOPT32 getopt32
1425# define LONGOPTS
1426#endif
1427
1428#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1429        llist_t *headers_llist = NULL;
1430#endif
1431
1432        INIT_G();
1433
1434#if ENABLE_FEATURE_WGET_TIMEOUT
1435        G.timeout_seconds = 900;
1436        signal(SIGALRM, alarm_handler);
1437#endif
1438        G.proxy_flag = "on";   /* use proxies if env vars are set */
1439        G.user_agent = "Wget"; /* "User-Agent" header field */
1440
1441#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1442#endif
1443        GETOPT32(argv, "^"
1444                "cqSO:P:Y:U:T:+"
1445                /*ignored:*/ "t:"
1446                /*ignored:*/ "n::"
1447                /* wget has exactly four -n<letter> opts, all of which we can ignore:
1448                 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1449                 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1450                 * -nH --no-host-directories: wget -r http://host/ won't create host/
1451                 * -np --no-parent
1452                 * "n::" above says that we accept -n[ARG].
1453                 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1454                 */
1455                "\0"
1456                "-1" /* at least one URL */
1457                IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
1458                LONGOPTS
1459                , &G.fname_out, &G.dir_prefix,
1460                &G.proxy_flag, &G.user_agent,
1461                IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1462                NULL, /* -t RETRIES */
1463                NULL  /* -n[ARG] */
1464                IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1465                IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1466        );
1467#if 0 /* option bits debug */
1468        if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1469        if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1470        if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1471        if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1472        if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1473        if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
1474        exit(0);
1475#endif
1476        argv += optind;
1477
1478#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1479        if (headers_llist) {
1480                int size = 0;
1481                char *hdr;
1482                llist_t *ll = headers_llist;
1483                while (ll) {
1484                        size += strlen(ll->data) + 2;
1485                        ll = ll->link;
1486                }
1487                G.extra_headers = hdr = xmalloc(size + 1);
1488                while (headers_llist) {
1489                        int bit;
1490                        const char *words;
1491
1492                        size = sprintf(hdr, "%s\r\n",
1493                                        (char*)llist_pop(&headers_llist));
1494                        /* a bit like index_in_substrings but don't match full key */
1495                        bit = 1;
1496                        words = wget_user_headers;
1497                        while (*words) {
1498                                if (strstr(hdr, words) == hdr) {
1499                                        G.user_headers |= bit;
1500                                        break;
1501                                }
1502                                bit <<= 1;
1503                                words += strlen(words) + 1;
1504                        }
1505                        hdr += size;
1506                }
1507        }
1508#endif
1509
1510        G.output_fd = -1;
1511        G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1512        if (G.fname_out) { /* -O FILE ? */
1513                if (LONE_DASH(G.fname_out)) { /* -O - ? */
1514                        G.output_fd = 1;
1515                        option_mask32 &= ~WGET_OPT_CONTINUE;
1516                }
1517                /* compat with wget: -O FILE can overwrite */
1518                G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1519        }
1520
1521        while (*argv)
1522                download_one_url(*argv++);
1523
1524        if (G.output_fd >= 0)
1525                xclose(G.output_fd);
1526
1527#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1528        free(G.extra_headers);
1529#endif
1530        FINI_G();
1531
1532        return EXIT_SUCCESS;
1533}
1534