busybox/networking/wget.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wget - retrieve a file using HTTP or FTP
   4 *
   5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
   6 * Licensed under GPLv2, see file LICENSE in this source tree.
   7 *
   8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
   9 * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
  10 */
  11//config:config WGET
  12//config:       bool "wget (38 kb)"
  13//config:       default y
  14//config:       help
  15//config:       wget is a utility for non-interactive download of files from HTTP
  16//config:       and FTP servers.
  17//config:
  18//config:config FEATURE_WGET_LONG_OPTIONS
  19//config:       bool "Enable long options"
  20//config:       default y
  21//config:       depends on WGET && LONG_OPTS
  22//config:
  23//config:config FEATURE_WGET_STATUSBAR
  24//config:       bool "Enable progress bar (+2k)"
  25//config:       default y
  26//config:       depends on WGET
  27//config:
  28//config:config FEATURE_WGET_FTP
  29//config:       bool "Enable FTP protocol (+1k)"
  30//config:       default y
  31//config:       depends on WGET
  32//config:       help
  33//config:       To support FTPS, enable FEATURE_WGET_HTTPS as well.
  34//config:
  35//config:config FEATURE_WGET_AUTHENTICATION
  36//config:       bool "Enable HTTP authentication"
  37//config:       default y
  38//config:       depends on WGET
  39//config:       help
  40//config:       Support authenticated HTTP transfers.
  41//config:
  42//config:config FEATURE_WGET_TIMEOUT
  43//config:       bool "Enable timeout option -T SEC"
  44//config:       default y
  45//config:       depends on WGET
  46//config:       help
  47//config:       Supports network read and connect timeouts for wget,
  48//config:       so that wget will give up and timeout, through the -T
  49//config:       command line option.
  50//config:
  51//config:       Currently only connect and network data read timeout are
  52//config:       supported (i.e., timeout is not applied to the DNS query). When
  53//config:       FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
  54//config:       will work in addition to -T.
  55//config:
  56//config:config FEATURE_WGET_HTTPS
  57//config:       bool "Support HTTPS using internal TLS code"
  58//config:       default y
  59//config:       depends on WGET
  60//config:       select TLS
  61//config:       help
  62//config:       wget will use internal TLS code to connect to https:// URLs.
  63//config:       It also enables FTPS support, but it's not well tested yet.
  64//config:       Note:
  65//config:       On NOMMU machines, ssl_helper applet should be available
  66//config:       in the $PATH for this to work. Make sure to select that applet.
  67//config:
  68//config:       Note: currently, TLS code only makes TLS I/O work, it
  69//config:       does *not* check that the peer is who it claims to be, etc.
  70//config:       IOW: it uses peer-supplied public keys to establish encryption
  71//config:       and signing keys, then encrypts and signs outgoing data and
  72//config:       decrypts incoming data.
  73//config:       It does not check signature hashes on the incoming data:
  74//config:       this means that attackers manipulating TCP packets can
  75//config:       send altered data and we unknowingly receive garbage.
  76//config:       (This check might be relatively easy to add).
  77//config:       It does not check public key's certificate:
  78//config:       this means that the peer may be an attacker impersonating
  79//config:       the server we think we are talking to.
  80//config:
  81//config:       If you think this is unacceptable, consider this. As more and more
  82//config:       servers switch to HTTPS-only operation, without such "crippled"
  83//config:       TLS code it is *impossible* to simply download a kernel source
  84//config:       from kernel.org. Which can in real world translate into
  85//config:       "my small automatic tooling to build cross-compilers from sources
  86//config:       no longer works, I need to additionally keep a local copy
  87//config:       of ~4 megabyte source tarball of a SSL library and ~2 megabyte
  88//config:       source of wget, need to compile and built both before I can
  89//config:       download anything. All this despite the fact that the build
  90//config:       is done in a QEMU sandbox on a machine with absolutely nothing
  91//config:       worth stealing, so I don't care if someone would go to a lot
  92//config:       of trouble to intercept my HTTPS download to send me an altered
  93//config:       kernel tarball".
  94//config:
  95//config:       If you still think this is unacceptable, send patches.
  96//config:
  97//config:       If you still think this is unacceptable, do not want to send
  98//config:       patches, but do want to waste bandwidth expaining how wrong
  99//config:       it is, you will be ignored.
 100//config:
 101//config:       FEATURE_WGET_OPENSSL does implement TLS verification
 102//config:       using the certificates available to OpenSSL.
 103//config:
 104//config:config FEATURE_WGET_OPENSSL
 105//config:       bool "Try to connect to HTTPS using openssl"
 106//config:       default y
 107//config:       depends on WGET
 108//config:       help
 109//config:       Try to use openssl to handle HTTPS.
 110//config:
 111//config:       OpenSSL has a simple SSL client for debug purposes.
 112//config:       If you select this option, wget will effectively run:
 113//config:       "openssl s_client -quiet -connect hostname:443
 114//config:       -servername hostname 2>/dev/null" and pipe its data
 115//config:       through it. -servername is not used if hostname is numeric.
 116//config:       Note inconvenient API: host resolution is done twice,
 117//config:       and there is no guarantee openssl's idea of IPv6 address
 118//config:       format is the same as ours.
 119//config:       Another problem is that s_client prints debug information
 120//config:       to stderr, and it needs to be suppressed. This means
 121//config:       all error messages get suppressed too.
 122//config:       openssl is also a big binary, often dynamically linked
 123//config:       against ~15 libraries.
 124//config:
 125//config:       If openssl can't be executed, internal TLS code will be used
 126//config:       (if you enabled it); if openssl can be executed but fails later,
 127//config:       wget can't detect this, and download will fail.
 128//config:
 129//config:       By default TLS verification is performed, unless
 130//config:       --no-check-certificate option is passed.
 131
 132//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
 133
 134//kbuild:lib-$(CONFIG_WGET) += wget.o
 135
 136//usage:#define wget_trivial_usage
 137//usage:        IF_FEATURE_WGET_LONG_OPTIONS(
 138//usage:       "[-cqS] [--spider] [-O FILE] [-o LOGFILE] [--header STR]\n"
 139//usage:       "        [--post-data STR | --post-file FILE] [-Y on/off]\n"
 140/* Since we ignore these opts, we don't show them in --help */
 141/* //usage:    "        [--no-cache] [--passive-ftp] [-t TRIES]" */
 142/* //usage:    "        [-nv] [-nc] [-nH] [-np]" */
 143//usage:       "        "IF_FEATURE_WGET_OPENSSL("[--no-check-certificate] ")"[-P DIR] [-U AGENT]"IF_FEATURE_WGET_TIMEOUT(" [-T SEC]")" URL..."
 144//usage:        )
 145//usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
 146//usage:       "[-cqS] [-O FILE] [-o LOGFILE] [-Y on/off] [-P DIR] [-U AGENT]"IF_FEATURE_WGET_TIMEOUT(" [-T SEC]")" URL..."
 147//usage:        )
 148//usage:#define wget_full_usage "\n\n"
 149//usage:       "Retrieve files via HTTP or FTP\n"
 150//usage:        IF_FEATURE_WGET_LONG_OPTIONS(
 151//usage:     "\n        --spider        Only check URL existence: $? is 0 if exists"
 152//usage:     "\n        --header STR    Add STR (of form 'header: value') to headers"
 153//usage:     "\n        --post-data STR Send STR using POST method"
 154//usage:     "\n        --post-file FILE        Send FILE using POST method"
 155//usage:        IF_FEATURE_WGET_OPENSSL(
 156//usage:     "\n        --no-check-certificate  Don't validate the server's certificate"
 157//usage:        )
 158//usage:        )
 159//usage:     "\n        -c              Continue retrieval of aborted transfer"
 160//usage:     "\n        -q              Quiet"
 161//usage:     "\n        -P DIR          Save to DIR (default .)"
 162//usage:     "\n        -S              Show server response"
 163//usage:        IF_FEATURE_WGET_TIMEOUT(
 164//usage:     "\n        -T SEC          Network read timeout is SEC seconds"
 165//usage:        )
 166//usage:     "\n        -O FILE         Save to FILE ('-' for stdout)"
 167//usage:     "\n        -o LOGFILE      Log messages to FILE"
 168//usage:     "\n        -U STR          Use STR for User-Agent header"
 169//usage:     "\n        -Y on/off       Use proxy"
 170
 171#include "libbb.h"
 172
 173#if 0
 174# define log_io(...) bb_error_msg(__VA_ARGS__)
 175# define SENDFMT(fp, fmt, ...) \
 176        do { \
 177                log_io("> " fmt, ##__VA_ARGS__); \
 178                fprintf(fp, fmt, ##__VA_ARGS__); \
 179        } while (0);
 180#else
 181# define log_io(...) ((void)0)
 182# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
 183#endif
 184
 185
 186#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
 187#define FTPS_SUPPORTED (ENABLE_FEATURE_WGET_FTP && ENABLE_FEATURE_WGET_HTTPS)
 188
 189struct host_info {
 190        char *allocated;
 191        const char *path;
 192        char       *user;
 193        const char *protocol;
 194        char       *host;
 195        int         port;
 196};
 197static const char P_HTTP[] ALIGN1 = "http";
 198#if SSL_SUPPORTED
 199static const char P_HTTPS[] ALIGN1 = "https";
 200#endif
 201#if ENABLE_FEATURE_WGET_FTP
 202static const char P_FTP[] ALIGN1 = "ftp";
 203#endif
 204#if FTPS_SUPPORTED
 205static const char P_FTPS[] ALIGN1 = "ftps";
 206#endif
 207
 208#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 209/* User-specified headers prevent using our corresponding built-in headers.  */
 210enum {
 211        HDR_HOST          = (1<<0),
 212        HDR_USER_AGENT    = (1<<1),
 213        HDR_RANGE         = (1<<2),
 214        HDR_CONTENT_TYPE  = (1<<3),
 215        HDR_AUTH          = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 216        HDR_PROXY_AUTH    = (1<<5) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 217};
 218static const char wget_user_headers[] ALIGN1 =
 219        "Host:\0"
 220        "User-Agent:\0"
 221        "Range:\0"
 222        "Content-Type:\0"
 223# if ENABLE_FEATURE_WGET_AUTHENTICATION
 224        "Authorization:\0"
 225        "Proxy-Authorization:\0"
 226# endif
 227        ;
 228# define USR_HEADER_HOST         (G.user_headers & HDR_HOST)
 229# define USR_HEADER_USER_AGENT   (G.user_headers & HDR_USER_AGENT)
 230# define USR_HEADER_RANGE        (G.user_headers & HDR_RANGE)
 231# define USR_HEADER_CONTENT_TYPE (G.user_headers & HDR_CONTENT_TYPE)
 232# define USR_HEADER_AUTH         (G.user_headers & HDR_AUTH)
 233# define USR_HEADER_PROXY_AUTH   (G.user_headers & HDR_PROXY_AUTH)
 234#else /* No long options, no user-headers :( */
 235# define USR_HEADER_HOST         0
 236# define USR_HEADER_USER_AGENT   0
 237# define USR_HEADER_RANGE        0
 238# define USR_HEADER_CONTENT_TYPE 0
 239# define USR_HEADER_AUTH         0
 240# define USR_HEADER_PROXY_AUTH   0
 241#endif
 242
 243/* Globals */
 244struct globals {
 245        off_t content_len;        /* Content-length of the file */
 246        off_t beg_range;          /* Range at which continue begins */
 247#if ENABLE_FEATURE_WGET_STATUSBAR
 248        off_t transferred;        /* Number of bytes transferred so far */
 249        const char *curfile;      /* Name of current file being transferred */
 250        bb_progress_t pmt;
 251#endif
 252        char *dir_prefix;
 253#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 254        char *post_data;
 255        char *post_file;
 256        char *extra_headers;
 257        unsigned char user_headers; /* Headers mentioned by the user */
 258#endif
 259        char *fname_out;        /* where to direct output (-O) */
 260        char *fname_log;        /* where to direct log (-o) */
 261        const char *proxy_flag; /* Use proxies if env vars are set */
 262        const char *user_agent; /* "User-Agent" header field */
 263        int output_fd;
 264        int log_fd;
 265        int o_flags;
 266#if ENABLE_FEATURE_WGET_TIMEOUT
 267        unsigned timeout_seconds;
 268        smallint die_if_timed_out;
 269#endif
 270        smallint chunked;         /* chunked transfer encoding */
 271        smallint got_clen;        /* got content-length: from server  */
 272        /* Local downloads do benefit from big buffer.
 273         * With 512 byte buffer, it was measured to be
 274         * an order of magnitude slower than with big one.
 275         */
 276        char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(16);
 277} FIX_ALIASING;
 278#define G (*ptr_to_globals)
 279#define INIT_G() do { \
 280        SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
 281} while (0)
 282#define FINI_G() do { \
 283        FREE_PTR_TO_GLOBALS(); \
 284} while (0)
 285
 286
 287/* Must match option string! */
 288enum {
 289        WGET_OPT_CONTINUE   = (1 << 0),
 290        WGET_OPT_QUIET      = (1 << 1),
 291        WGET_OPT_SERVER_RESPONSE = (1 << 2),
 292        WGET_OPT_OUTNAME    = (1 << 3),
 293        WGET_OPT_LOGNAME    = (1 << 4),
 294        WGET_OPT_PREFIX     = (1 << 5),
 295        WGET_OPT_PROXY      = (1 << 6),
 296        WGET_OPT_USER_AGENT = (1 << 7),
 297        WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
 298        WGET_OPT_RETRIES    = (1 << 9),
 299        WGET_OPT_nsomething = (1 << 10),
 300        WGET_OPT_HEADER     = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 301        WGET_OPT_POST_DATA  = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 302        WGET_OPT_SPIDER     = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 303        WGET_OPT_NO_CHECK_CERT = (1 << 14) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 304        WGET_OPT_POST_FILE  = (1 << 15) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 305        /* hijack this bit for other than opts purposes: */
 306        WGET_NO_FTRUNCATE   = (1 << 31)
 307};
 308
 309#define WGET_OPT_POST (WGET_OPT_POST_DATA | WGET_OPT_POST_FILE)
 310
 311enum {
 312        PROGRESS_START = -1,
 313        PROGRESS_END   = 0,
 314        PROGRESS_BUMP  = 1,
 315};
 316#if ENABLE_FEATURE_WGET_STATUSBAR
 317static void progress_meter(int flag)
 318{
 319        int notty;
 320
 321        if (option_mask32 & WGET_OPT_QUIET)
 322                return;
 323
 324        /* Don't save progress to log file */
 325        if (G.log_fd >= 0)
 326                return;
 327
 328        if (flag == PROGRESS_START)
 329                bb_progress_init(&G.pmt, G.curfile);
 330
 331        notty = bb_progress_update(&G.pmt,
 332                        G.beg_range,
 333                        G.transferred,
 334                        (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
 335        );
 336
 337        if (flag == PROGRESS_END) {
 338                bb_progress_free(&G.pmt);
 339                if (notty == 0)
 340                        bb_putchar_stderr('\n'); /* it's tty */
 341                G.transferred = 0;
 342        }
 343}
 344#else
 345static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) {}
 346#endif
 347
 348
 349/* IPv6 knows scoped address types i.e. link and site local addresses. Link
 350 * local addresses can have a scope identifier to specify the
 351 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
 352 * identifier is only valid on a single node.
 353 *
 354 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
 355 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
 356 * in the Host header as invalid requests, see
 357 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
 358 */
 359static void strip_ipv6_scope_id(char *host)
 360{
 361        char *scope, *cp;
 362
 363        /* bbox wget actually handles IPv6 addresses without [], like
 364         * wget "http://::1/xxx", but this is not standard.
 365         * To save code, _here_ we do not support it. */
 366
 367        if (host[0] != '[')
 368                return; /* not IPv6 */
 369
 370        scope = strchr(host, '%');
 371        if (!scope)
 372                return;
 373
 374        /* Remove the IPv6 zone identifier from the host address */
 375        cp = strchr(host, ']');
 376        if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
 377                /* malformed address (not "[xx]:nn" or "[xx]") */
 378                return;
 379        }
 380
 381        /* cp points to "]...", scope points to "%eth0]..." */
 382        overlapping_strcpy(scope, cp);
 383}
 384
 385#if ENABLE_FEATURE_WGET_AUTHENTICATION
 386/* Base64-encode character string. */
 387static char *base64enc(const char *str)
 388{
 389        /* paranoia */
 390        unsigned len = strnlen(str, sizeof(G.wget_buf)/4*3 - 10);
 391        bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
 392        return G.wget_buf;
 393}
 394#endif
 395
 396#if ENABLE_FEATURE_WGET_TIMEOUT
 397static void alarm_handler(int sig UNUSED_PARAM)
 398{
 399        /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
 400        if (G.die_if_timed_out)
 401                bb_simple_error_msg_and_die("download timed out");
 402}
 403static void set_alarm(void)
 404{
 405        if (G.timeout_seconds) {
 406                alarm(G.timeout_seconds);
 407                G.die_if_timed_out = 1;
 408        }
 409}
 410# define clear_alarm() ((void)(G.die_if_timed_out = 0))
 411#else
 412# define set_alarm()   ((void)0)
 413# define clear_alarm() ((void)0)
 414#endif
 415
 416#if ENABLE_FEATURE_WGET_OPENSSL
 417/*
 418 * is_ip_address() attempts to verify whether or not a string
 419 * contains an IPv4 or IPv6 address (vs. an FQDN).  The result
 420 * of inet_pton() can be used to determine this.
 421 */
 422static int is_ip_address(const char *string)
 423{
 424        struct sockaddr_in sa;
 425
 426        int result = inet_pton(AF_INET, string, &(sa.sin_addr));
 427# if ENABLE_FEATURE_IPV6
 428        if (result == 0) {
 429                struct sockaddr_in6 sa6;
 430                result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
 431        }
 432# endif
 433        return (result == 1);
 434}
 435#endif
 436
 437static FILE *open_socket(len_and_sockaddr *lsa)
 438{
 439        int fd;
 440        FILE *fp;
 441
 442        set_alarm();
 443        fd = xconnect_stream(lsa);
 444        clear_alarm();
 445
 446        /* glibc 2.4 seems to try seeking on it - ??! */
 447        /* hopefully it understands what ESPIPE means... */
 448        fp = fdopen(fd, "r+");
 449        if (!fp)
 450                bb_die_memory_exhausted();
 451
 452        return fp;
 453}
 454
 455/* We balk at any control chars in other side's messages.
 456 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
 457 * and error messages.
 458 *
 459 * The only exception is tabs, which are converted to (one) space:
 460 * HTTP's "headers: <whitespace> values" may have those.
 461 */
 462static char* sanitize_string(char *s)
 463{
 464        unsigned char *p = (void *) s;
 465        while (*p) {
 466                if (*p < ' ') {
 467                        if (*p != '\t')
 468                                break;
 469                        *p = ' ';
 470                }
 471                p++;
 472        }
 473        *p = '\0';
 474        return s;
 475}
 476
 477/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
 478static char fgets_trim_sanitize(FILE *fp, const char *fmt)
 479{
 480        char c;
 481        char *buf_ptr;
 482
 483        set_alarm();
 484        if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
 485                bb_simple_perror_msg_and_die("error getting response");
 486        clear_alarm();
 487
 488        buf_ptr = strchrnul(G.wget_buf, '\n');
 489        c = *buf_ptr;
 490#if 1
 491        /* Disallow any control chars: trim at first char < 0x20 */
 492        sanitize_string(G.wget_buf);
 493#else
 494        *buf_ptr = '\0';
 495        buf_ptr = strchrnul(G.wget_buf, '\r');
 496        *buf_ptr = '\0';
 497#endif
 498
 499        log_io("< %s", G.wget_buf);
 500
 501        if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
 502                fprintf(stderr, fmt, G.wget_buf);
 503
 504        return c;
 505}
 506
 507#if ENABLE_FEATURE_WGET_FTP
 508static int ftpcmd(const char *s1, const char *s2, FILE *fp)
 509{
 510        int result;
 511        if (s1) {
 512                if (!s2)
 513                        s2 = "";
 514                fprintf(fp, "%s%s\r\n", s1, s2);
 515                /* With --server-response, wget also shows its ftp commands */
 516                if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
 517                        fprintf(stderr, "--> %s%s\n\n", s1, s2);
 518                fflush(fp);
 519                log_io("> %s%s", s1, s2);
 520        }
 521
 522        /* Read until "Nxx something" is received */
 523        G.wget_buf[3] = 0;
 524        do {
 525                fgets_trim_sanitize(fp, "%s\n");
 526        } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
 527
 528        G.wget_buf[3] = '\0';
 529        result = xatoi_positive(G.wget_buf);
 530        G.wget_buf[3] = ' ';
 531        return result;
 532}
 533#endif
 534
 535static void parse_url(const char *src_url, struct host_info *h)
 536{
 537        char *url, *p, *sp;
 538
 539        free(h->allocated);
 540        h->allocated = url = xstrdup(src_url);
 541
 542        h->protocol = P_HTTP;
 543        p = strstr(url, "://");
 544        if (p) {
 545                *p = '\0';
 546                h->host = p + 3;
 547#if ENABLE_FEATURE_WGET_FTP
 548                if (strcmp(url, P_FTP) == 0) {
 549                        h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
 550                        h->protocol = P_FTP;
 551                } else
 552#endif
 553#if FTPS_SUPPORTED
 554                if (strcmp(url, P_FTPS) == 0) {
 555                        h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
 556                        h->protocol = P_FTPS;
 557                } else
 558#endif
 559#if SSL_SUPPORTED
 560                if (strcmp(url, P_HTTPS) == 0) {
 561                        h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
 562                        h->protocol = P_HTTPS;
 563                } else
 564#endif
 565                if (strcmp(url, P_HTTP) == 0) {
 566                        goto http;
 567                } else {
 568                        *p = ':';
 569                        bb_error_msg_and_die("not an http or ftp url: %s", url);
 570                }
 571        } else {
 572                // GNU wget is user-friendly and falls back to http://
 573                h->host = url;
 574 http:
 575                h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
 576        }
 577
 578        // FYI:
 579        // "Real" wget 'http://busybox.net?var=a/b' sends this request:
 580        //   'GET /?var=a/b HTTP/1.0'
 581        //   and saves 'index.html?var=a%2Fb' (we save 'b')
 582        // wget 'http://busybox.net?login=john@doe':
 583        //   request: 'GET /?login=john@doe HTTP/1.0'
 584        //   saves: 'index.html?login=john@doe' (we save 'login=john@doe')
 585        // wget 'http://busybox.net#test/test':
 586        //   request: 'GET / HTTP/1.0'
 587        //   saves: 'index.html' (we save 'test')
 588        //
 589        // We also don't add unique .N suffix if file exists...
 590        sp = strchr(h->host, '/');
 591        p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
 592        p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
 593        if (!sp) {
 594                h->path = "";
 595        } else if (*sp == '/') {
 596                *sp = '\0';
 597                h->path = sp + 1;
 598        } else {
 599                // sp points to '#' or '?'
 600                // Note:
 601                // http://busybox.net?login=john@doe is a valid URL
 602                // (without '/' between ".net" and "?"),
 603                // can't store NUL at sp[-1] - this destroys hostname.
 604                *sp++ = '\0';
 605                h->path = sp;
 606        }
 607
 608        sp = strrchr(h->host, '@');
 609        if (sp != NULL) {
 610                // URL-decode "user:password" string before base64-encoding:
 611                // wget http://test:my%20pass@example.com should send
 612                // Authorization: Basic dGVzdDpteSBwYXNz
 613                // which decodes to "test:my pass".
 614                // Standard wget and curl do this too.
 615                *sp = '\0';
 616                free(h->user);
 617                h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
 618                h->host = sp + 1;
 619        }
 620        /* else: h->user remains NULL, or as set by original request
 621         * before redirect (if we are here after a redirect).
 622         */
 623}
 624
 625static char *get_sanitized_hdr(FILE *fp)
 626{
 627        char *s, *hdrval;
 628        int c;
 629
 630        /* retrieve header line */
 631        c = fgets_trim_sanitize(fp, "  %s\n");
 632
 633        /* end of the headers? */
 634        if (G.wget_buf[0] == '\0')
 635                return NULL;
 636
 637        /* convert the header name to lower case */
 638        for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
 639                /*
 640                 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
 641                 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
 642                 * "A-Z" maps to "a-z".
 643                 * "@[\]" can't occur in header names.
 644                 * "^_" maps to "~,DEL" (which is wrong).
 645                 * "^" was never seen yet, "_" was seen from web.archive.org
 646                 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
 647                 */
 648                *s |= 0x20;
 649        }
 650
 651        /* verify we are at the end of the header name */
 652        if (*s != ':')
 653                bb_error_msg_and_die("bad header line: %s", G.wget_buf);
 654
 655        /* locate the start of the header value */
 656        *s++ = '\0';
 657        hdrval = skip_whitespace(s);
 658
 659        if (c != '\n') {
 660                /* Rats! The buffer isn't big enough to hold the entire header value */
 661                while (c = getc(fp), c != EOF && c != '\n')
 662                        continue;
 663        }
 664
 665        return hdrval;
 666}
 667
 668static void reset_beg_range_to_zero(void)
 669{
 670        bb_simple_error_msg("restart failed");
 671        G.beg_range = 0;
 672        xlseek(G.output_fd, 0, SEEK_SET);
 673        /* Done at the end instead: */
 674        /* ftruncate(G.output_fd, 0); */
 675}
 676
 677#if ENABLE_FEATURE_WGET_OPENSSL
 678static int spawn_https_helper_openssl(const char *host, unsigned port)
 679{
 680        char *allocated = NULL;
 681        char *servername;
 682        int sp[2];
 683        int pid;
 684        IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
 685
 686        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
 687                /* Kernel can have AF_UNIX support disabled */
 688                bb_simple_perror_msg_and_die("socketpair");
 689
 690        if (!strchr(host, ':'))
 691                host = allocated = xasprintf("%s:%u", host, port);
 692        servername = xstrdup(host);
 693        strrchr(servername, ':')[0] = '\0';
 694
 695        fflush_all();
 696        pid = xvfork();
 697        if (pid == 0) {
 698                /* Child */
 699                char *argv[13];
 700                char **argp;
 701
 702                close(sp[0]);
 703                xmove_fd(sp[1], 0);
 704                xdup2(0, 1);
 705                /*
 706                 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
 707                 * It prints some debug stuff on stderr, don't know how to suppress it.
 708                 * Work around by dev-nulling stderr. We lose all error messages :(
 709                 */
 710                xmove_fd(2, 3);
 711                xopen("/dev/null", O_RDWR);
 712                memset(&argv, 0, sizeof(argv));
 713                argv[0] = (char*)"openssl";
 714                argv[1] = (char*)"s_client";
 715                argv[2] = (char*)"-quiet";
 716                argv[3] = (char*)"-connect";
 717                argv[4] = (char*)host;
 718                /*
 719                 * Per RFC 6066 Section 3, the only permitted values in the
 720                 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
 721                 * IPv4 and IPv6 addresses, port numbers are not allowed.
 722                 */
 723                argp = &argv[5];
 724                if (!is_ip_address(servername)) {
 725                        *argp++ = (char*)"-servername"; //[5]
 726                        *argp++ = (char*)servername;    //[6]
 727                }
 728                if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
 729                        /* Abort on bad server certificate */
 730                        *argp++ = (char*)"-verify";              //[7]
 731                        *argp++ = (char*)"100";                  //[8]
 732                        *argp++ = (char*)"-verify_return_error"; //[9]
 733                        if (!is_ip_address(servername)) {
 734                                *argp++ = (char*)"-verify_hostname"; //[10]
 735                                *argp++ = (char*)servername;         //[11]
 736                        } else {
 737                                *argp++ = (char*)"-verify_ip"; //[10]
 738                                *argp++ = (char*)host;         //[11]
 739                        }
 740                }
 741                //[12] (or earlier) is NULL terminator
 742
 743                BB_EXECVP(argv[0], argv);
 744                xmove_fd(3, 2);
 745# if ENABLE_FEATURE_WGET_HTTPS
 746                child_failed = 1;
 747                xfunc_die();
 748# else
 749                bb_perror_msg_and_die("can't execute '%s'", argv[0]);
 750# endif
 751                /* notreached */
 752        }
 753
 754        /* Parent */
 755        free(servername);
 756        free(allocated);
 757        close(sp[1]);
 758# if ENABLE_FEATURE_WGET_HTTPS
 759        if (child_failed) {
 760                close(sp[0]);
 761                return -1;
 762        }
 763# endif
 764        return sp[0];
 765}
 766#endif
 767
 768#if ENABLE_FEATURE_WGET_HTTPS
 769static void spawn_ssl_client(const char *host, int network_fd, int flags)
 770{
 771        int sp[2];
 772        int pid;
 773        char *servername, *p;
 774
 775        if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
 776                option_mask32 |= WGET_OPT_NO_CHECK_CERT;
 777                bb_simple_error_msg("note: TLS certificate validation not implemented");
 778        }
 779
 780        servername = xstrdup(host);
 781        p = strrchr(servername, ':');
 782        if (p) *p = '\0';
 783
 784        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
 785                /* Kernel can have AF_UNIX support disabled */
 786                bb_simple_perror_msg_and_die("socketpair");
 787
 788        fflush_all();
 789        pid = BB_MMU ? xfork() : xvfork();
 790        if (pid == 0) {
 791                /* Child */
 792                close(sp[0]);
 793                xmove_fd(sp[1], 0);
 794                xdup2(0, 1);
 795                if (BB_MMU) {
 796                        tls_state_t *tls = new_tls_state();
 797                        tls->ifd = tls->ofd = network_fd;
 798                        tls_handshake(tls, servername);
 799                        tls_run_copy_loop(tls, flags);
 800                        exit(0);
 801                } else {
 802                        char *argv[6];
 803
 804                        xmove_fd(network_fd, 3);
 805                        argv[0] = (char*)"ssl_client";
 806                        argv[1] = (char*)"-s3";
 807                        //TODO: if (!is_ip_address(servername))...
 808                        argv[2] = (char*)"-n";
 809                        argv[3] = servername;
 810                        argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
 811                        argv[5] = NULL;
 812                        BB_EXECVP(argv[0], argv);
 813                        bb_perror_msg_and_die("can't execute '%s'", argv[0]);
 814                }
 815                /* notreached */
 816        }
 817
 818        /* Parent */
 819        free(servername);
 820        close(sp[1]);
 821        xmove_fd(sp[0], network_fd);
 822}
 823#endif
 824
 825#if ENABLE_FEATURE_WGET_FTP
 826static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 827{
 828        FILE *sfp;
 829        char *pass;
 830        int port;
 831
 832        sfp = open_socket(lsa);
 833#if FTPS_SUPPORTED
 834        if (target->protocol == P_FTPS)
 835                spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
 836#endif
 837
 838        if (ftpcmd(NULL, NULL, sfp) != 220)
 839                bb_simple_error_msg_and_die(G.wget_buf);
 840                /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
 841
 842        /* Split username:password pair */
 843        pass = (char*)"busybox"; /* password for "anonymous" */
 844        if (target->user) {
 845                pass = strchr(target->user, ':');
 846                if (pass)
 847                        *pass++ = '\0';
 848        }
 849
 850        /* Log in */
 851        switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
 852        case 230:
 853                break;
 854        case 331:
 855                if (ftpcmd("PASS ", pass, sfp) == 230)
 856                        break;
 857                /* fall through (failed login) */
 858        default:
 859                bb_error_msg_and_die("ftp login: %s", G.wget_buf);
 860        }
 861
 862        ftpcmd("TYPE I", NULL, sfp);
 863
 864        /* Query file size */
 865        if (ftpcmd("SIZE ", target->path, sfp) == 213) {
 866                G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
 867                if (G.content_len < 0 || errno) {
 868                        bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
 869                }
 870                G.got_clen = 1;
 871        }
 872
 873        /* Enter passive mode */
 874        if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
 875                /* good */
 876        } else
 877        if (ftpcmd("PASV", NULL, sfp) != 227) {
 878 pasv_error:
 879                bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
 880        }
 881        port = parse_pasv_epsv(G.wget_buf);
 882        if (port < 0)
 883                goto pasv_error;
 884
 885        set_nport(&lsa->u.sa, htons(port));
 886
 887        *dfpp = open_socket(lsa);
 888
 889#if FTPS_SUPPORTED
 890        if (target->protocol == P_FTPS) {
 891                /* "PROT P" enables encryption of data stream.
 892                 * Without it (or with "PROT C"), data is sent unencrypted.
 893                 */
 894                if (ftpcmd("PROT P", NULL, sfp) == 200)
 895                        spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
 896        }
 897#endif
 898
 899        if (G.beg_range != 0) {
 900                sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
 901                if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
 902                        G.content_len -= G.beg_range;
 903                else
 904                        reset_beg_range_to_zero();
 905        }
 906
 907//TODO: needs ftp-escaping 0xff and '\n' bytes here.
 908//Or disallow '\n' altogether via sanitize_string() in parse_url().
 909//But 0xff's are possible in valid utf8 filenames.
 910        if (ftpcmd("RETR ", target->path, sfp) > 150)
 911                bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
 912
 913        return sfp;
 914}
 915#endif
 916
 917static void NOINLINE retrieve_file_data(FILE *dfp)
 918{
 919#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 920# if ENABLE_FEATURE_WGET_TIMEOUT
 921        unsigned second_cnt = G.timeout_seconds;
 922# endif
 923        struct pollfd polldata;
 924
 925        polldata.fd = fileno(dfp);
 926        polldata.events = POLLIN | POLLPRI;
 927#endif
 928        if (!(option_mask32 & WGET_OPT_QUIET)) {
 929                if (G.output_fd == 1)
 930                        fprintf(stderr, "writing to stdout\n");
 931                else
 932                        fprintf(stderr, "saving to '%s'\n", G.fname_out);
 933        }
 934        progress_meter(PROGRESS_START);
 935
 936        if (G.chunked)
 937                goto get_clen;
 938
 939        /* Loops only if chunked */
 940        while (1) {
 941
 942#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 943                /* Must use nonblocking I/O, otherwise fread will loop
 944                 * and *block* until it reads full buffer,
 945                 * which messes up progress bar and/or timeout logic.
 946                 * Because of nonblocking I/O, we need to dance
 947                 * very carefully around EAGAIN. See explanation at
 948                 * clearerr() calls.
 949                 */
 950                ndelay_on(polldata.fd);
 951#endif
 952                while (1) {
 953                        int n;
 954                        unsigned rdsz;
 955
 956#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 957                        /* fread internally uses read loop, which in our case
 958                         * is usually exited when we get EAGAIN.
 959                         * In this case, libc sets error marker on the stream.
 960                         * Need to clear it before next fread to avoid possible
 961                         * rare false positive ferror below. Rare because usually
 962                         * fread gets more than zero bytes, and we don't fall
 963                         * into if (n <= 0) ...
 964                         */
 965                        clearerr(dfp);
 966#endif
 967                        errno = 0;
 968                        rdsz = sizeof(G.wget_buf);
 969                        if (G.got_clen) {
 970                                if (G.content_len < (off_t)sizeof(G.wget_buf)) {
 971                                        if ((int)G.content_len <= 0)
 972                                                break;
 973                                        rdsz = (unsigned)G.content_len;
 974                                }
 975                        }
 976                        n = fread(G.wget_buf, 1, rdsz, dfp);
 977
 978                        if (n > 0) {
 979                                xwrite(G.output_fd, G.wget_buf, n);
 980#if ENABLE_FEATURE_WGET_STATUSBAR
 981                                G.transferred += n;
 982#endif
 983                                if (G.got_clen) {
 984                                        G.content_len -= n;
 985                                        if (G.content_len == 0)
 986                                                break;
 987                                }
 988#if ENABLE_FEATURE_WGET_TIMEOUT
 989                                second_cnt = G.timeout_seconds;
 990#endif
 991                                goto bump;
 992                        }
 993
 994                        /* n <= 0.
 995                         * man fread:
 996                         * If error occurs, or EOF is reached, the return value
 997                         * is a short item count (or zero).
 998                         * fread does not distinguish between EOF and error.
 999                         */
1000                        if (errno != EAGAIN) {
1001                                if (ferror(dfp)) {
1002                                        progress_meter(PROGRESS_END);
1003                                        bb_simple_perror_msg_and_die(bb_msg_read_error);
1004                                }
1005                                break; /* EOF, not error */
1006                        }
1007
1008#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
1009                        /* It was EAGAIN. There is no data. Wait up to one second
1010                         * then abort if timed out, or update the bar and try reading again.
1011                         */
1012                        if (safe_poll(&polldata, 1, 1000) == 0) {
1013# if ENABLE_FEATURE_WGET_TIMEOUT
1014                                if (second_cnt != 0 && --second_cnt == 0) {
1015                                        progress_meter(PROGRESS_END);
1016                                        bb_simple_error_msg_and_die("download timed out");
1017                                }
1018# endif
1019                                /* We used to loop back to poll here,
1020                                 * but there is no great harm in letting fread
1021                                 * to try reading anyway.
1022                                 */
1023                        }
1024#endif
1025 bump:
1026                        /* Need to do it _every_ second for "stalled" indicator
1027                         * to be shown properly.
1028                         */
1029                        progress_meter(PROGRESS_BUMP);
1030                } /* while (reading data) */
1031
1032#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
1033                clearerr(dfp);
1034                ndelay_off(polldata.fd); /* else fgets can get very unhappy */
1035#endif
1036                if (!G.chunked)
1037                        break;
1038
1039                /* Each chunk ends with "\r\n" - eat it */
1040                fgets_trim_sanitize(dfp, NULL);
1041 get_clen:
1042                /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
1043                fgets_trim_sanitize(dfp, NULL);
1044                errno = 0;
1045                G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
1046                /*
1047                 * Had a bug with inputs like "ffffffff0001f400"
1048                 * smashing the heap later. Ensure >= 0.
1049                 */
1050                if (G.content_len < 0 || errno)
1051                        bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
1052                if (G.content_len == 0)
1053                        break; /* all done! */
1054                G.got_clen = 1;
1055                /*
1056                 * Note that fgets may result in some data being buffered in dfp.
1057                 * We loop back to fread, which will retrieve this data.
1058                 * Also note that code has to be arranged so that fread
1059                 * is done _before_ one-second poll wait - poll doesn't know
1060                 * about stdio buffering and can result in spurious one second waits!
1061                 */
1062        }
1063
1064        /* Draw full bar and free its resources */
1065        G.chunked = 0;  /* makes it show 100% even for chunked download */
1066        G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1067        progress_meter(PROGRESS_END);
1068        if (G.content_len != 0) {
1069                bb_simple_perror_msg_and_die("connection closed prematurely");
1070                /* GNU wget says "DATE TIME (NN MB/s) - Connection closed at byte NNN. Retrying." */
1071        }
1072
1073        /* If -c failed, we restart from the beginning,
1074         * but we do not truncate file then, we do it only now, at the end.
1075         * This lets user to ^C if his 99% complete 10 GB file download
1076         * failed to restart *without* losing the almost complete file.
1077         */
1078        {
1079                off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1080                if (pos != (off_t)-1) {
1081                        /* do not truncate if -O- is in use, a user complained about
1082                         * "wget -qO- 'http://example.com/empty' >>FILE" truncating FILE.
1083                         */
1084                        if (!(option_mask32 & WGET_NO_FTRUNCATE))
1085                                ftruncate(G.output_fd, pos);
1086                }
1087        }
1088
1089        if (!(option_mask32 & WGET_OPT_QUIET)) {
1090                if (G.output_fd == 1)
1091                        fprintf(stderr, "written to stdout\n");
1092                else
1093                        fprintf(stderr, "'%s' saved\n", G.fname_out);
1094        }
1095}
1096
1097static void download_one_url(const char *url)
1098{
1099        bool use_proxy;                 /* Use proxies if env vars are set  */
1100        int redir_limit;
1101        len_and_sockaddr *lsa;
1102        FILE *sfp;                      /* socket to web/ftp server         */
1103        FILE *dfp;                      /* socket to ftp server (data)      */
1104        char *fname_out_alloc;
1105        char *redirected_path = NULL;
1106        struct host_info server;
1107        struct host_info target;
1108
1109        server.allocated = NULL;
1110        target.allocated = NULL;
1111        server.user = NULL;
1112        target.user = NULL;
1113
1114        parse_url(url, &target);
1115
1116        /* Use the proxy if necessary */
1117        use_proxy = (strcmp(G.proxy_flag, "off") != 0);
1118        if (use_proxy) {
1119                char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
1120//FIXME: what if protocol is https? Ok to use http_proxy?
1121                use_proxy = (proxy && proxy[0]);
1122                if (use_proxy)
1123                        parse_url(proxy, &server);
1124        }
1125        if (!use_proxy) {
1126                server.protocol = target.protocol;
1127                server.port = target.port;
1128                if (ENABLE_FEATURE_IPV6) {
1129                        //free(server.allocated); - can't be non-NULL
1130                        server.host = server.allocated = xstrdup(target.host);
1131                } else {
1132                        server.host = target.host;
1133                }
1134        }
1135
1136        if (ENABLE_FEATURE_IPV6)
1137                strip_ipv6_scope_id(target.host);
1138
1139        /* If there was no -O FILE, guess output filename */
1140        fname_out_alloc = NULL;
1141        if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1142                G.fname_out = bb_get_last_path_component_nostrip(target.path);
1143                /* handle "wget http://kernel.org//" */
1144                if (G.fname_out[0] == '/' || !G.fname_out[0])
1145                        G.fname_out = (char*)"index.html";
1146                /* -P DIR is considered only if there was no -O FILE */
1147                if (G.dir_prefix)
1148                        G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
1149                else {
1150                        /* redirects may free target.path later, need to make a copy */
1151                        G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
1152                }
1153        }
1154#if ENABLE_FEATURE_WGET_STATUSBAR
1155        G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
1156#endif
1157
1158        /* Determine where to start transfer */
1159        G.beg_range = 0;
1160        if (option_mask32 & WGET_OPT_CONTINUE) {
1161                G.output_fd = open(G.fname_out, O_WRONLY);
1162                if (G.output_fd >= 0) {
1163                        G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
1164                }
1165                /* File doesn't exist. We do not create file here yet.
1166                 * We are not sure it exists on remote side */
1167        }
1168
1169        redir_limit = 16;
1170 resolve_lsa:
1171        lsa = xhost2sockaddr(server.host, server.port);
1172        if (!(option_mask32 & WGET_OPT_QUIET)) {
1173                char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1174                fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1175                free(s);
1176        }
1177 establish_session:
1178        /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1179        G.got_clen = 0;
1180        G.chunked = 0;
1181        if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
1182                /*
1183                 *  HTTP session
1184                 */
1185                char *str;
1186                int status;
1187
1188                /* Open socket to http(s) server */
1189#if ENABLE_FEATURE_WGET_OPENSSL
1190                /* openssl (and maybe internal TLS) support is configured */
1191                if (server.protocol == P_HTTPS) {
1192                        /* openssl-based helper
1193                         * Inconvenient API since we can't give it an open fd
1194                         */
1195                        int fd = spawn_https_helper_openssl(server.host, server.port);
1196# if ENABLE_FEATURE_WGET_HTTPS
1197                        if (fd < 0) { /* no openssl? try internal */
1198                                sfp = open_socket(lsa);
1199                                spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1200                                goto socket_opened;
1201                        }
1202# else
1203                        /* We don't check for exec("openssl") failure in this case */
1204# endif
1205                        sfp = fdopen(fd, "r+");
1206                        if (!sfp)
1207                                bb_die_memory_exhausted();
1208                        goto socket_opened;
1209                }
1210                sfp = open_socket(lsa);
1211 socket_opened:
1212#elif ENABLE_FEATURE_WGET_HTTPS
1213                /* Only internal TLS support is configured */
1214                sfp = open_socket(lsa);
1215                if (server.protocol == P_HTTPS)
1216                        spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
1217#else
1218                /* ssl (https) support is not configured */
1219                sfp = open_socket(lsa);
1220#endif
1221                /* Send HTTP request */
1222                if (use_proxy) {
1223                        SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1224                                target.protocol, target.host,
1225                                target.path);
1226                } else {
1227                        SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1228                                (option_mask32 & WGET_OPT_POST) ? "POST" : "GET",
1229                                target.path);
1230                }
1231                if (!USR_HEADER_HOST)
1232                        SENDFMT(sfp, "Host: %s\r\n", target.host);
1233                if (!USR_HEADER_USER_AGENT)
1234                        SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1235
1236                /* Ask server to close the connection as soon as we are done
1237                 * (IOW: we do not intend to send more requests)
1238                 */
1239                SENDFMT(sfp, "Connection: close\r\n");
1240
1241#if ENABLE_FEATURE_WGET_AUTHENTICATION
1242                if (target.user && !USR_HEADER_AUTH) {
1243                        SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1244                                base64enc(target.user));
1245                }
1246                if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1247                        SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1248                                base64enc(server.user));
1249                }
1250#endif
1251
1252                if (G.beg_range != 0 && !USR_HEADER_RANGE)
1253                        SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1254
1255#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1256                if (G.extra_headers) {
1257                        log_io(G.extra_headers);
1258                        fputs(G.extra_headers, sfp);
1259                }
1260
1261                if (option_mask32 & WGET_OPT_POST_FILE) {
1262                        int fd = xopen_stdin(G.post_file);
1263                        G.post_data = xmalloc_read(fd, NULL);
1264                        close(fd);
1265                }
1266
1267                if (G.post_data) {
1268                        /* If user did not override it... */
1269                        if (!USR_HEADER_CONTENT_TYPE) {
1270                                SENDFMT(sfp,
1271                                        "Content-Type: application/x-www-form-urlencoded\r\n"
1272                                );
1273                        }
1274                        SENDFMT(sfp,
1275                                "Content-Length: %u\r\n"
1276                                "\r\n"
1277                                "%s",
1278                                (int) strlen(G.post_data), G.post_data
1279                        );
1280                } else
1281#endif
1282                {
1283                        SENDFMT(sfp, "\r\n");
1284                }
1285
1286                fflush(sfp);
1287
1288/* Tried doing this unconditionally.
1289 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1290 */
1291#if SSL_SUPPORTED
1292                if (target.protocol == P_HTTPS) {
1293                        /* If we use SSL helper, keeping our end of the socket open for writing
1294                         * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1295                         * even after child closes its copy of the fd.
1296                         * This helps:
1297                         */
1298                        shutdown(fileno(sfp), SHUT_WR);
1299                }
1300#endif
1301
1302                /*
1303                 * Retrieve HTTP response line and check for "200" status code.
1304                 */
1305 read_response:
1306                fgets_trim_sanitize(sfp, "  %s\n");
1307
1308                str = G.wget_buf;
1309                str = skip_non_whitespace(str);
1310                str = skip_whitespace(str);
1311                // FIXME: no error check
1312                // xatou wouldn't work: "200 OK"
1313                status = atoi(str);
1314                switch (status) {
1315                case 0:
1316                case 100:
1317                        while (get_sanitized_hdr(sfp) != NULL)
1318                                /* eat all remaining headers */;
1319                        goto read_response;
1320
1321                /* Success responses */
1322                case 200:
1323                        /* fall through */
1324                case 201: /* 201 Created */
1325/* "The request has been fulfilled and resulted in a new resource being created" */
1326                        /* Standard wget is reported to treat this as success */
1327                        /* fall through */
1328                case 202: /* 202 Accepted */
1329/* "The request has been accepted for processing, but the processing has not been completed" */
1330                        /* Treat as success: fall through */
1331                case 203: /* 203 Non-Authoritative Information */
1332/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1333                        /* fall through */
1334                case 204: /* 204 No Content */
1335/*
1336Response 204 doesn't say "null file", it says "metadata
1337has changed but data didn't":
1338
1339"10.2.5 204 No Content
1340The server has fulfilled the request but does not need to return
1341an entity-body, and might want to return updated metainformation.
1342The response MAY include new or updated metainformation in the form
1343of entity-headers, which if present SHOULD be associated with
1344the requested variant.
1345
1346If the client is a user agent, it SHOULD NOT change its document
1347view from that which caused the request to be sent. This response
1348is primarily intended to allow input for actions to take place
1349without causing a change to the user agent's active document view,
1350although any new or updated metainformation SHOULD be applied
1351to the document currently in the user agent's active view.
1352
1353The 204 response MUST NOT include a message-body, and thus
1354is always terminated by the first empty line after the header fields."
1355
1356However, in real world it was observed that some web servers
1357(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1358*/
1359                        if (G.beg_range != 0) {
1360                                /* "Range:..." was not honored by the server.
1361                                 * Restart download from the beginning.
1362                                 */
1363                                reset_beg_range_to_zero();
1364                        }
1365                        break;
1366                /* 205 Reset Content ?? what to do on this ??   */
1367
1368                case 300:  /* redirection */
1369                case 301:
1370                case 302:
1371                case 303:
1372                case 307:
1373                case 308:
1374                        break;
1375
1376                case 206: /* Partial Content */
1377                        if (G.beg_range != 0)
1378                                /* "Range:..." worked. Good. */
1379                                break;
1380                        /* Partial Content even though we did not ask for it??? */
1381                        /* fall through */
1382                default:
1383                        bb_error_msg_and_die("server returned error: %s", G.wget_buf);
1384                }
1385
1386                /*
1387                 * Retrieve HTTP headers.
1388                 */
1389                while ((str = get_sanitized_hdr(sfp)) != NULL) {
1390                        static const char keywords[] ALIGN1 =
1391                                "content-length\0""transfer-encoding\0""location\0";
1392                        enum {
1393                                KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1394                        };
1395                        smalluint key;
1396
1397                        /* get_sanitized_hdr converted "FOO:" string to lowercase */
1398
1399                        /* strip trailing whitespace */
1400                        char *s = strchrnul(str, '\0') - 1;
1401                        while (s >= str && (*s == ' ' || *s == '\t')) {
1402                                *s = '\0';
1403                                s--;
1404                        }
1405                        key = index_in_strings(keywords, G.wget_buf) + 1;
1406                        if (key == KEY_content_length) {
1407                                G.content_len = BB_STRTOOFF(str, NULL, 10);
1408                                if (G.content_len < 0 || errno) {
1409                                        bb_error_msg_and_die("content-length %s is garbage", str);
1410                                }
1411                                G.got_clen = 1;
1412                                continue;
1413                        }
1414                        if (key == KEY_transfer_encoding) {
1415                                if (strcmp(str_tolower(str), "chunked") != 0)
1416                                        bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
1417                                G.chunked = 1;
1418                        }
1419                        if (key == KEY_location && status >= 300) {
1420                                if (--redir_limit == 0)
1421                                        bb_simple_error_msg_and_die("too many redirections");
1422                                fclose(sfp);
1423                                if (str[0] == '/') {
1424                                        free(redirected_path);
1425                                        target.path = redirected_path = xstrdup(str + 1);
1426                                        /* lsa stays the same: it's on the same server */
1427                                } else {
1428                                        parse_url(str, &target);
1429                                        if (!use_proxy) {
1430                                                /* server.user remains untouched */
1431                                                free(server.allocated);
1432                                                server.allocated = NULL;
1433                                                server.protocol = target.protocol;
1434                                                server.host = target.host;
1435                                                /* strip_ipv6_scope_id(target.host); - no! */
1436                                                /* we assume remote never gives us IPv6 addr with scope id */
1437                                                server.port = target.port;
1438                                                free(lsa);
1439                                                goto resolve_lsa;
1440                                        } /* else: lsa stays the same: we use proxy */
1441                                }
1442                                goto establish_session;
1443                        }
1444                }
1445//              if (status >= 300)
1446//                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
1447
1448                /* For HTTP, data is pumped over the same connection */
1449                dfp = sfp;
1450        } else {
1451#if ENABLE_FEATURE_WGET_FTP
1452                /*
1453                 *  FTP session
1454                 */
1455                sfp = prepare_ftp_session(&dfp, &target, lsa);
1456#endif
1457        }
1458
1459        free(lsa);
1460
1461        if (!(option_mask32 & WGET_OPT_SPIDER)) {
1462                if (G.output_fd < 0)
1463                        G.output_fd = xopen(G.fname_out, G.o_flags);
1464                retrieve_file_data(dfp);
1465                if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1466                        xclose(G.output_fd);
1467                        G.output_fd = -1;
1468                }
1469        } else {
1470                if (!(option_mask32 & WGET_OPT_QUIET))
1471                        fprintf(stderr, "remote file exists\n");
1472        }
1473
1474#if ENABLE_FEATURE_WGET_FTP
1475        if (dfp != sfp) {
1476                /* It's ftp. Close data connection properly */
1477                fclose(dfp);
1478                if (ftpcmd(NULL, NULL, sfp) != 226)
1479                        bb_error_msg_and_die("ftp error: %s", G.wget_buf);
1480                /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1481        }
1482#endif
1483        fclose(sfp);
1484
1485        free(server.allocated);
1486        free(target.allocated);
1487        free(server.user);
1488        free(target.user);
1489        free(fname_out_alloc);
1490        free(redirected_path);
1491}
1492
1493int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1494int wget_main(int argc UNUSED_PARAM, char **argv)
1495{
1496#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1497        static const char wget_longopts[] ALIGN1 =
1498                /* name, has_arg, val */
1499                "continue\0"         No_argument       "c"
1500                "quiet\0"            No_argument       "q"
1501                "server-response\0"  No_argument       "S"
1502                "output-document\0"  Required_argument "O"
1503                "output-file\0"      Required_argument "o"
1504                "directory-prefix\0" Required_argument "P"
1505                "proxy\0"            Required_argument "Y"
1506                "user-agent\0"       Required_argument "U"
1507IF_FEATURE_WGET_TIMEOUT(
1508                "timeout\0"          Required_argument "T")
1509                /* Ignored: */
1510IF_DESKTOP(     "tries\0"            Required_argument "t")
1511                "header\0"           Required_argument "\xff"
1512                "post-data\0"        Required_argument "\xfe"
1513                "spider\0"           No_argument       "\xfd"
1514                "no-check-certificate\0" No_argument   "\xfc"
1515                "post-file\0"        Required_argument "\xfb"
1516                /* Ignored (we always use PASV): */
1517IF_DESKTOP(     "passive-ftp\0"      No_argument       "\xf0")
1518                /* Ignored (we don't support caching) */
1519IF_DESKTOP(     "no-cache\0"         No_argument       "\xf0")
1520IF_DESKTOP(     "no-verbose\0"       No_argument       "\xf0")
1521IF_DESKTOP(     "no-clobber\0"       No_argument       "\xf0")
1522IF_DESKTOP(     "no-host-directories\0" No_argument    "\xf0")
1523IF_DESKTOP(     "no-parent\0"        No_argument       "\xf0")
1524                ;
1525# define GETOPT32 getopt32long
1526# define LONGOPTS ,wget_longopts
1527#else
1528# define GETOPT32 getopt32
1529# define LONGOPTS
1530#endif
1531
1532#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1533        llist_t *headers_llist = NULL;
1534#endif
1535
1536        INIT_G();
1537
1538#if ENABLE_FEATURE_WGET_TIMEOUT
1539        G.timeout_seconds = 900;
1540        signal(SIGALRM, alarm_handler);
1541#endif
1542        G.proxy_flag = "on";   /* use proxies if env vars are set */
1543        G.user_agent = "Wget"; /* "User-Agent" header field */
1544
1545        GETOPT32(argv, "^"
1546                "cqSO:o:P:Y:U:T:+"
1547                /*ignored:*/ "t:"
1548                /*ignored:*/ "n::"
1549                /* wget has exactly four -n<letter> opts, all of which we can ignore:
1550                 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1551                 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1552                 * -nH --no-host-directories: wget -r http://host/ won't create host/
1553                 * -np --no-parent
1554                 * "n::" above says that we accept -n[ARG].
1555                 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1556                 */
1557                "\0"
1558                "-1" /* at least one URL */
1559                IF_FEATURE_WGET_LONG_OPTIONS(":\xfe--\xfb")
1560                IF_FEATURE_WGET_LONG_OPTIONS(":\xfe--\xfe")
1561                IF_FEATURE_WGET_LONG_OPTIONS(":\xfb--\xfb")
1562                IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
1563                LONGOPTS
1564                , &G.fname_out, &G.fname_log, &G.dir_prefix,
1565                &G.proxy_flag, &G.user_agent,
1566                IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1567                NULL, /* -t RETRIES */
1568                NULL  /* -n[ARG] */
1569                IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1570                IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1571                IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_file)
1572        );
1573#if 0 /* option bits debug */
1574        if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1575        if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1576        if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1577        if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1578        if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1579        if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
1580        if (option_mask32 & WGET_OPT_POST_FILE) bb_error_msg("--post-file");
1581        exit(0);
1582#endif
1583        argv += optind;
1584
1585#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1586        if (headers_llist) {
1587                int size = 0;
1588                char *hdr;
1589                llist_t *ll = headers_llist;
1590                while (ll) {
1591                        size += strlen(ll->data) + 2;
1592                        ll = ll->link;
1593                }
1594                G.extra_headers = hdr = xmalloc(size + 1);
1595                while (headers_llist) {
1596                        int bit;
1597                        const char *words;
1598
1599                        size = sprintf(hdr, "%s\r\n",
1600                                        (char*)llist_pop(&headers_llist));
1601                        /* a bit like index_in_substrings but don't match full key */
1602                        bit = 1;
1603                        words = wget_user_headers;
1604                        while (*words) {
1605                                if (strstr(hdr, words) == hdr) {
1606                                        G.user_headers |= bit;
1607                                        break;
1608                                }
1609                                bit <<= 1;
1610                                words += strlen(words) + 1;
1611                        }
1612                        hdr += size;
1613                }
1614        }
1615#endif
1616
1617        G.output_fd = -1;
1618        G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1619        if (G.fname_out) { /* -O FILE ? */
1620                if (LONE_DASH(G.fname_out)) { /* -O - ? */
1621                        G.output_fd = 1;
1622                        option_mask32 = (option_mask32 & (~WGET_OPT_CONTINUE)) | WGET_NO_FTRUNCATE;
1623                }
1624                /* compat with wget: -O FILE can overwrite */
1625                G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1626        }
1627
1628        G.log_fd = -1;
1629        if (G.fname_log) { /* -o FILE ? */
1630                if (!LONE_DASH(G.fname_log)) { /* not -o - ? */
1631                        /* compat with wget: -o FILE can overwrite */
1632                        G.log_fd = xopen(G.fname_log, O_WRONLY | O_CREAT | O_TRUNC);
1633                        /* Redirect only stderr to log file, so -O - will work */
1634                        xdup2(G.log_fd, STDERR_FILENO);
1635                }
1636        }
1637
1638        while (*argv)
1639                download_one_url(*argv++);
1640
1641        if (G.output_fd >= 0)
1642                xclose(G.output_fd);
1643
1644        if (G.log_fd >= 0)
1645                xclose(G.log_fd);
1646
1647#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1648        free(G.extra_headers);
1649#endif
1650        FINI_G();
1651
1652        return EXIT_SUCCESS;
1653}
1654