busybox/networking/wget.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wget - retrieve a file using HTTP or FTP
   4 *
   5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
   6 * Licensed under GPLv2, see file LICENSE in this source tree.
   7 *
   8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
   9 * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
  10 */
  11
  12//config:config WGET
  13//config:       bool "wget"
  14//config:       default y
  15//config:       help
  16//config:         wget is a utility for non-interactive download of files from HTTP
  17//config:         and FTP servers.
  18//config:
  19//config:config FEATURE_WGET_STATUSBAR
  20//config:       bool "Enable a nifty process meter (+2k)"
  21//config:       default y
  22//config:       depends on WGET
  23//config:       help
  24//config:         Enable the transfer progress bar for wget transfers.
  25//config:
  26//config:config FEATURE_WGET_AUTHENTICATION
  27//config:       bool "Enable HTTP authentication"
  28//config:       default y
  29//config:       depends on WGET
  30//config:       help
  31//config:         Support authenticated HTTP transfers.
  32//config:
  33//config:config FEATURE_WGET_LONG_OPTIONS
  34//config:       bool "Enable long options"
  35//config:       default y
  36//config:       depends on WGET && LONG_OPTS
  37//config:       help
  38//config:         Support long options for the wget applet.
  39//config:
  40//config:config FEATURE_WGET_TIMEOUT
  41//config:       bool "Enable timeout option -T SEC"
  42//config:       default y
  43//config:       depends on WGET
  44//config:       help
  45//config:         Supports network read and connect timeouts for wget,
  46//config:         so that wget will give up and timeout, through the -T
  47//config:         command line option.
  48//config:
  49//config:         Currently only connect and network data read timeout are
  50//config:         supported (i.e., timeout is not applied to the DNS query). When
  51//config:         FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
  52//config:         will work in addition to -T.
  53//config:
  54//config:config FEATURE_WGET_OPENSSL
  55//config:       bool "Try to connect to HTTPS using openssl"
  56//config:       default y
  57//config:       depends on WGET
  58//config:       help
  59//config:         Choose how wget establishes SSL connection for https:// URLs.
  60//config:
  61//config:         Busybox itself contains no SSL code. wget will spawn
  62//config:         a helper program to talk over HTTPS.
  63//config:
  64//config:         OpenSSL has a simple SSL client for debug purposes.
  65//config:         If you select "openssl" helper, wget will effectively run:
  66//config:         "openssl s_client -quiet -connect hostname:443
  67//config:         -servername hostname 2>/dev/null" and pipe its data
  68//config:         through it. -servername is not used if hostname is numeric.
  69//config:         Note inconvenient API: host resolution is done twice,
  70//config:         and there is no guarantee openssl's idea of IPv6 address
  71//config:         format is the same as ours.
  72//config:         Another problem is that s_client prints debug information
  73//config:         to stderr, and it needs to be suppressed. This means
  74//config:         all error messages get suppressed too.
  75//config:         openssl is also a big binary, often dynamically linked
  76//config:         against ~15 libraries.
  77//config:
  78//config:config FEATURE_WGET_SSL_HELPER
  79//config:       bool "Try to connect to HTTPS using ssl_helper"
  80//config:       default y
  81//config:       depends on WGET
  82//config:       help
  83//config:         Choose how wget establishes SSL connection for https:// URLs.
  84//config:
  85//config:         Busybox itself contains no SSL code. wget will spawn
  86//config:         a helper program to talk over HTTPS.
  87//config:
  88//config:         ssl_helper is a tool which can be built statically
  89//config:         from busybox sources against a small embedded SSL library.
  90//config:         Please see networking/ssl_helper/README.
  91//config:         It does not require double host resolution and emits
  92//config:         error messages to stderr.
  93//config:
  94//config:         Precompiled static binary may be available at
  95//config:         http://busybox.net/downloads/binaries/
  96
  97//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
  98
  99//kbuild:lib-$(CONFIG_WGET) += wget.o
 100
 101//usage:#define wget_trivial_usage
 102//usage:        IF_FEATURE_WGET_LONG_OPTIONS(
 103//usage:       "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
 104//usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
 105/* Since we ignore these opts, we don't show them in --help */
 106/* //usage:    "        [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
 107/* //usage:    "        [-nv] [-nc] [-nH] [-np]" */
 108//usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
 109//usage:        )
 110//usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
 111//usage:       "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
 112//usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
 113//usage:        )
 114//usage:#define wget_full_usage "\n\n"
 115//usage:       "Retrieve files via HTTP or FTP\n"
 116//usage:        IF_FEATURE_WGET_LONG_OPTIONS(
 117//usage:     "\n        --spider        Spider mode - only check file existence"
 118//usage:        )
 119//usage:     "\n        -c              Continue retrieval of aborted transfer"
 120//usage:     "\n        -q              Quiet"
 121//usage:     "\n        -P DIR          Save to DIR (default .)"
 122//usage:        IF_FEATURE_WGET_TIMEOUT(
 123//usage:     "\n        -T SEC          Network read timeout is SEC seconds"
 124//usage:        )
 125//usage:     "\n        -O FILE         Save to FILE ('-' for stdout)"
 126//usage:     "\n        -U STR          Use STR for User-Agent header"
 127//usage:     "\n        -Y on/off       Use proxy"
 128
 129#include "libbb.h"
 130
 131#if 0
 132# define log_io(...) bb_error_msg(__VA_ARGS__)
 133# define SENDFMT(fp, fmt, ...) \
 134        do { \
 135                log_io("> " fmt, ##__VA_ARGS__); \
 136                fprintf(fp, fmt, ##__VA_ARGS__); \
 137        } while (0);
 138#else
 139# define log_io(...) ((void)0)
 140# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
 141#endif
 142
 143
 144struct host_info {
 145        char *allocated;
 146        const char *path;
 147        char       *user;
 148        const char *protocol;
 149        char       *host;
 150        int         port;
 151};
 152static const char P_FTP[] ALIGN1 = "ftp";
 153static const char P_HTTP[] ALIGN1 = "http";
 154#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
 155static const char P_HTTPS[] ALIGN1 = "https";
 156#endif
 157
 158#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 159/* User-specified headers prevent using our corresponding built-in headers.  */
 160enum {
 161        HDR_HOST          = (1<<0),
 162        HDR_USER_AGENT    = (1<<1),
 163        HDR_RANGE         = (1<<2),
 164        HDR_AUTH          = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 165        HDR_PROXY_AUTH    = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 166};
 167static const char wget_user_headers[] ALIGN1 =
 168        "Host:\0"
 169        "User-Agent:\0"
 170        "Range:\0"
 171# if ENABLE_FEATURE_WGET_AUTHENTICATION
 172        "Authorization:\0"
 173        "Proxy-Authorization:\0"
 174# endif
 175        ;
 176# define USR_HEADER_HOST       (G.user_headers & HDR_HOST)
 177# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
 178# define USR_HEADER_RANGE      (G.user_headers & HDR_RANGE)
 179# define USR_HEADER_AUTH       (G.user_headers & HDR_AUTH)
 180# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
 181#else /* No long options, no user-headers :( */
 182# define USR_HEADER_HOST       0
 183# define USR_HEADER_USER_AGENT 0
 184# define USR_HEADER_RANGE      0
 185# define USR_HEADER_AUTH       0
 186# define USR_HEADER_PROXY_AUTH 0
 187#endif
 188
 189/* Globals */
 190struct globals {
 191        off_t content_len;        /* Content-length of the file */
 192        off_t beg_range;          /* Range at which continue begins */
 193#if ENABLE_FEATURE_WGET_STATUSBAR
 194        off_t transferred;        /* Number of bytes transferred so far */
 195        const char *curfile;      /* Name of current file being transferred */
 196        bb_progress_t pmt;
 197#endif
 198        char *dir_prefix;
 199#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 200        char *post_data;
 201        char *extra_headers;
 202        unsigned char user_headers; /* Headers mentioned by the user */
 203#endif
 204        char *fname_out;        /* where to direct output (-O) */
 205        const char *proxy_flag; /* Use proxies if env vars are set */
 206        const char *user_agent; /* "User-Agent" header field */
 207#if ENABLE_FEATURE_WGET_TIMEOUT
 208        unsigned timeout_seconds;
 209        bool die_if_timed_out;
 210#endif
 211        int output_fd;
 212        int o_flags;
 213        smallint chunked;         /* chunked transfer encoding */
 214        smallint got_clen;        /* got content-length: from server  */
 215        /* Local downloads do benefit from big buffer.
 216         * With 512 byte buffer, it was measured to be
 217         * an order of magnitude slower than with big one.
 218         */
 219        uint64_t just_to_align_next_member;
 220        char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
 221} FIX_ALIASING;
 222#define G (*ptr_to_globals)
 223#define INIT_G() do { \
 224        SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
 225} while (0)
 226#define FINI_G() do { \
 227        FREE_PTR_TO_GLOBALS(); \
 228} while (0)
 229
 230
 231/* Must match option string! */
 232enum {
 233        WGET_OPT_CONTINUE   = (1 << 0),
 234        WGET_OPT_QUIET      = (1 << 1),
 235        WGET_OPT_OUTNAME    = (1 << 2),
 236        WGET_OPT_PREFIX     = (1 << 3),
 237        WGET_OPT_PROXY      = (1 << 4),
 238        WGET_OPT_USER_AGENT = (1 << 5),
 239        WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6),
 240        WGET_OPT_RETRIES    = (1 << 7),
 241        WGET_OPT_nsomething = (1 << 8),
 242        WGET_OPT_HEADER     = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 243        WGET_OPT_POST_DATA  = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 244        WGET_OPT_SPIDER     = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 245};
 246
 247enum {
 248        PROGRESS_START = -1,
 249        PROGRESS_END   = 0,
 250        PROGRESS_BUMP  = 1,
 251};
 252#if ENABLE_FEATURE_WGET_STATUSBAR
 253static void progress_meter(int flag)
 254{
 255        if (option_mask32 & WGET_OPT_QUIET)
 256                return;
 257
 258        if (flag == PROGRESS_START)
 259                bb_progress_init(&G.pmt, G.curfile);
 260
 261        bb_progress_update(&G.pmt,
 262                        G.beg_range,
 263                        G.transferred,
 264                        (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
 265        );
 266
 267        if (flag == PROGRESS_END) {
 268                bb_progress_free(&G.pmt);
 269                bb_putchar_stderr('\n');
 270                G.transferred = 0;
 271        }
 272}
 273#else
 274static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
 275#endif
 276
 277
 278/* IPv6 knows scoped address types i.e. link and site local addresses. Link
 279 * local addresses can have a scope identifier to specify the
 280 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
 281 * identifier is only valid on a single node.
 282 *
 283 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
 284 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
 285 * in the Host header as invalid requests, see
 286 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
 287 */
 288static void strip_ipv6_scope_id(char *host)
 289{
 290        char *scope, *cp;
 291
 292        /* bbox wget actually handles IPv6 addresses without [], like
 293         * wget "http://::1/xxx", but this is not standard.
 294         * To save code, _here_ we do not support it. */
 295
 296        if (host[0] != '[')
 297                return; /* not IPv6 */
 298
 299        scope = strchr(host, '%');
 300        if (!scope)
 301                return;
 302
 303        /* Remove the IPv6 zone identifier from the host address */
 304        cp = strchr(host, ']');
 305        if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
 306                /* malformed address (not "[xx]:nn" or "[xx]") */
 307                return;
 308        }
 309
 310        /* cp points to "]...", scope points to "%eth0]..." */
 311        overlapping_strcpy(scope, cp);
 312}
 313
 314#if ENABLE_FEATURE_WGET_AUTHENTICATION
 315/* Base64-encode character string. */
 316static char *base64enc(const char *str)
 317{
 318        unsigned len = strlen(str);
 319        if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
 320                len = sizeof(G.wget_buf)/4*3 - 10;
 321        bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
 322        return G.wget_buf;
 323}
 324#endif
 325
 326static char* sanitize_string(char *s)
 327{
 328        unsigned char *p = (void *) s;
 329        while (*p >= ' ')
 330                p++;
 331        *p = '\0';
 332        return s;
 333}
 334
 335#if ENABLE_FEATURE_WGET_TIMEOUT
 336static void alarm_handler(int sig UNUSED_PARAM)
 337{
 338        /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
 339        if (G.die_if_timed_out)
 340                bb_error_msg_and_die("download timed out");
 341}
 342static void set_alarm(void)
 343{
 344        if (G.timeout_seconds) {
 345                alarm(G.timeout_seconds);
 346                G.die_if_timed_out = 1;
 347        }
 348}
 349# define clear_alarm() ((void)(G.die_if_timed_out = 0))
 350#else
 351# define set_alarm()   ((void)0)
 352# define clear_alarm() ((void)0)
 353#endif
 354
 355#if ENABLE_FEATURE_WGET_OPENSSL
 356/*
 357 * is_ip_address() attempts to verify whether or not a string
 358 * contains an IPv4 or IPv6 address (vs. an FQDN).  The result
 359 * of inet_pton() can be used to determine this.
 360 *
 361 * TODO add proper error checking when inet_pton() returns -1
 362 * (some form of system error has occurred, and errno is set)
 363 */
 364static int is_ip_address(const char *string)
 365{
 366        struct sockaddr_in sa;
 367
 368        int result = inet_pton(AF_INET, string, &(sa.sin_addr));
 369# if ENABLE_FEATURE_IPV6
 370        if (result == 0) {
 371                struct sockaddr_in6 sa6;
 372                result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
 373        }
 374# endif
 375        return (result == 1);
 376}
 377#endif
 378
 379static FILE *open_socket(len_and_sockaddr *lsa)
 380{
 381        int fd;
 382        FILE *fp;
 383
 384        set_alarm();
 385        fd = xconnect_stream(lsa);
 386        clear_alarm();
 387
 388        /* glibc 2.4 seems to try seeking on it - ??! */
 389        /* hopefully it understands what ESPIPE means... */
 390        fp = fdopen(fd, "r+");
 391        if (!fp)
 392                bb_perror_msg_and_die(bb_msg_memory_exhausted);
 393
 394        return fp;
 395}
 396
 397/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
 398static char fgets_and_trim(FILE *fp)
 399{
 400        char c;
 401        char *buf_ptr;
 402
 403        set_alarm();
 404        if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
 405                bb_perror_msg_and_die("error getting response");
 406        clear_alarm();
 407
 408        buf_ptr = strchrnul(G.wget_buf, '\n');
 409        c = *buf_ptr;
 410        *buf_ptr = '\0';
 411        buf_ptr = strchrnul(G.wget_buf, '\r');
 412        *buf_ptr = '\0';
 413
 414        log_io("< %s", G.wget_buf);
 415
 416        return c;
 417}
 418
 419static int ftpcmd(const char *s1, const char *s2, FILE *fp)
 420{
 421        int result;
 422        if (s1) {
 423                if (!s2)
 424                        s2 = "";
 425                fprintf(fp, "%s%s\r\n", s1, s2);
 426                fflush(fp);
 427                log_io("> %s%s", s1, s2);
 428        }
 429
 430        do {
 431                fgets_and_trim(fp);
 432        } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
 433
 434        G.wget_buf[3] = '\0';
 435        result = xatoi_positive(G.wget_buf);
 436        G.wget_buf[3] = ' ';
 437        return result;
 438}
 439
 440static void parse_url(const char *src_url, struct host_info *h)
 441{
 442        char *url, *p, *sp;
 443
 444        free(h->allocated);
 445        h->allocated = url = xstrdup(src_url);
 446
 447        h->protocol = P_FTP;
 448        p = strstr(url, "://");
 449        if (p) {
 450                *p = '\0';
 451                h->host = p + 3;
 452                if (strcmp(url, P_FTP) == 0) {
 453                        h->port = bb_lookup_port(P_FTP, "tcp", 21);
 454                } else
 455#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
 456                if (strcmp(url, P_HTTPS) == 0) {
 457                        h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
 458                        h->protocol = P_HTTPS;
 459                } else
 460#endif
 461                if (strcmp(url, P_HTTP) == 0) {
 462 http:
 463                        h->port = bb_lookup_port(P_HTTP, "tcp", 80);
 464                        h->protocol = P_HTTP;
 465                } else {
 466                        *p = ':';
 467                        bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
 468                }
 469        } else {
 470                // GNU wget is user-friendly and falls back to http://
 471                h->host = url;
 472                goto http;
 473        }
 474
 475        // FYI:
 476        // "Real" wget 'http://busybox.net?var=a/b' sends this request:
 477        //   'GET /?var=a/b HTTP 1.0'
 478        //   and saves 'index.html?var=a%2Fb' (we save 'b')
 479        // wget 'http://busybox.net?login=john@doe':
 480        //   request: 'GET /?login=john@doe HTTP/1.0'
 481        //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
 482        // wget 'http://busybox.net#test/test':
 483        //   request: 'GET / HTTP/1.0'
 484        //   saves: 'index.html' (we save 'test')
 485        //
 486        // We also don't add unique .N suffix if file exists...
 487        sp = strchr(h->host, '/');
 488        p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
 489        p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
 490        if (!sp) {
 491                h->path = "";
 492        } else if (*sp == '/') {
 493                *sp = '\0';
 494                h->path = sp + 1;
 495        } else { // '#' or '?'
 496                // http://busybox.net?login=john@doe is a valid URL
 497                // memmove converts to:
 498                // http:/busybox.nett?login=john@doe...
 499                memmove(h->host - 1, h->host, sp - h->host);
 500                h->host--;
 501                sp[-1] = '\0';
 502                h->path = sp;
 503        }
 504
 505        sp = strrchr(h->host, '@');
 506        if (sp != NULL) {
 507                // URL-decode "user:password" string before base64-encoding:
 508                // wget http://test:my%20pass@example.com should send
 509                // Authorization: Basic dGVzdDpteSBwYXNz
 510                // which decodes to "test:my pass".
 511                // Standard wget and curl do this too.
 512                *sp = '\0';
 513                free(h->user);
 514                h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
 515                h->host = sp + 1;
 516        }
 517        /* else: h->user remains NULL, or as set by original request
 518         * before redirect (if we are here after a redirect).
 519         */
 520}
 521
 522static char *gethdr(FILE *fp)
 523{
 524        char *s, *hdrval;
 525        int c;
 526
 527        /* retrieve header line */
 528        c = fgets_and_trim(fp);
 529
 530        /* end of the headers? */
 531        if (G.wget_buf[0] == '\0')
 532                return NULL;
 533
 534        /* convert the header name to lower case */
 535        for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
 536                /*
 537                 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
 538                 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
 539                 * "A-Z" maps to "a-z".
 540                 * "@[\]" can't occur in header names.
 541                 * "^_" maps to "~,DEL" (which is wrong).
 542                 * "^" was never seen yet, "_" was seen from web.archive.org
 543                 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
 544                 */
 545                *s |= 0x20;
 546        }
 547
 548        /* verify we are at the end of the header name */
 549        if (*s != ':')
 550                bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
 551
 552        /* locate the start of the header value */
 553        *s++ = '\0';
 554        hdrval = skip_whitespace(s);
 555
 556        if (c != '\n') {
 557                /* Rats! The buffer isn't big enough to hold the entire header value */
 558                while (c = getc(fp), c != EOF && c != '\n')
 559                        continue;
 560        }
 561
 562        return hdrval;
 563}
 564
 565static void reset_beg_range_to_zero(void)
 566{
 567        bb_error_msg("restart failed");
 568        G.beg_range = 0;
 569        xlseek(G.output_fd, 0, SEEK_SET);
 570        /* Done at the end instead: */
 571        /* ftruncate(G.output_fd, 0); */
 572}
 573
 574static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 575{
 576        FILE *sfp;
 577        char *str;
 578        int port;
 579
 580        if (!target->user)
 581                target->user = xstrdup("anonymous:busybox@");
 582
 583        sfp = open_socket(lsa);
 584        if (ftpcmd(NULL, NULL, sfp) != 220)
 585                bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
 586
 587        /*
 588         * Splitting username:password pair,
 589         * trying to log in
 590         */
 591        str = strchr(target->user, ':');
 592        if (str)
 593                *str++ = '\0';
 594        switch (ftpcmd("USER ", target->user, sfp)) {
 595        case 230:
 596                break;
 597        case 331:
 598                if (ftpcmd("PASS ", str, sfp) == 230)
 599                        break;
 600                /* fall through (failed login) */
 601        default:
 602                bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
 603        }
 604
 605        ftpcmd("TYPE I", NULL, sfp);
 606
 607        /*
 608         * Querying file size
 609         */
 610        if (ftpcmd("SIZE ", target->path, sfp) == 213) {
 611                G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
 612                if (G.content_len < 0 || errno) {
 613                        bb_error_msg_and_die("SIZE value is garbage");
 614                }
 615                G.got_clen = 1;
 616        }
 617
 618        /*
 619         * Entering passive mode
 620         */
 621        if (ftpcmd("PASV", NULL, sfp) != 227) {
 622 pasv_error:
 623                bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
 624        }
 625        // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
 626        // Server's IP is N1.N2.N3.N4 (we ignore it)
 627        // Server's port for data connection is P1*256+P2
 628        str = strrchr(G.wget_buf, ')');
 629        if (str) str[0] = '\0';
 630        str = strrchr(G.wget_buf, ',');
 631        if (!str) goto pasv_error;
 632        port = xatou_range(str+1, 0, 255);
 633        *str = '\0';
 634        str = strrchr(G.wget_buf, ',');
 635        if (!str) goto pasv_error;
 636        port += xatou_range(str+1, 0, 255) * 256;
 637        set_nport(&lsa->u.sa, htons(port));
 638
 639        *dfpp = open_socket(lsa);
 640
 641        if (G.beg_range != 0) {
 642                sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
 643                if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
 644                        G.content_len -= G.beg_range;
 645                else
 646                        reset_beg_range_to_zero();
 647        }
 648
 649        if (ftpcmd("RETR ", target->path, sfp) > 150)
 650                bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
 651
 652        return sfp;
 653}
 654
 655#if ENABLE_FEATURE_WGET_OPENSSL
 656static int spawn_https_helper_openssl(const char *host, unsigned port)
 657{
 658        char *allocated = NULL;
 659        char *servername;
 660        int sp[2];
 661        int pid;
 662        IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
 663
 664        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
 665                /* Kernel can have AF_UNIX support disabled */
 666                bb_perror_msg_and_die("socketpair");
 667
 668        if (!strchr(host, ':'))
 669                host = allocated = xasprintf("%s:%u", host, port);
 670        servername = xstrdup(host);
 671        strrchr(servername, ':')[0] = '\0';
 672
 673        fflush_all();
 674        pid = xvfork();
 675        if (pid == 0) {
 676                /* Child */
 677                char *argv[8];
 678
 679                close(sp[0]);
 680                xmove_fd(sp[1], 0);
 681                xdup2(0, 1);
 682                /*
 683                 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
 684                 * It prints some debug stuff on stderr, don't know how to suppress it.
 685                 * Work around by dev-nulling stderr. We lose all error messages :(
 686                 */
 687                xmove_fd(2, 3);
 688                xopen("/dev/null", O_RDWR);
 689                memset(&argv, 0, sizeof(argv));
 690                argv[0] = (char*)"openssl";
 691                argv[1] = (char*)"s_client";
 692                argv[2] = (char*)"-quiet";
 693                argv[3] = (char*)"-connect";
 694                argv[4] = (char*)host;
 695                /*
 696                 * Per RFC 6066 Section 3, the only permitted values in the
 697                 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
 698                 * IPv4 and IPv6 addresses, port numbers are not allowed.
 699                 */
 700                if (!is_ip_address(servername)) {
 701                        argv[5] = (char*)"-servername";
 702                        argv[6] = (char*)servername;
 703                }
 704
 705                BB_EXECVP(argv[0], argv);
 706                xmove_fd(3, 2);
 707# if ENABLE_FEATURE_WGET_SSL_HELPER
 708                child_failed = 1;
 709                xfunc_die();
 710# else
 711                bb_perror_msg_and_die("can't execute '%s'", argv[0]);
 712# endif
 713                /* notreached */
 714        }
 715
 716        /* Parent */
 717        free(servername);
 718        free(allocated);
 719        close(sp[1]);
 720# if ENABLE_FEATURE_WGET_SSL_HELPER
 721        if (child_failed) {
 722                close(sp[0]);
 723                return -1;
 724        }
 725# endif
 726        return sp[0];
 727}
 728#endif
 729
 730/* See networking/ssl_helper/README how to build one */
 731#if ENABLE_FEATURE_WGET_SSL_HELPER
 732static void spawn_https_helper_small(int network_fd)
 733{
 734        int sp[2];
 735        int pid;
 736
 737        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
 738                /* Kernel can have AF_UNIX support disabled */
 739                bb_perror_msg_and_die("socketpair");
 740
 741        pid = BB_MMU ? xfork() : xvfork();
 742        if (pid == 0) {
 743                /* Child */
 744                char *argv[3];
 745
 746                close(sp[0]);
 747                xmove_fd(sp[1], 0);
 748                xdup2(0, 1);
 749                xmove_fd(network_fd, 3);
 750                /*
 751                 * A simple ssl/tls helper
 752                 */
 753                argv[0] = (char*)"ssl_helper";
 754                argv[1] = (char*)"-d3";
 755                argv[2] = NULL;
 756                BB_EXECVP(argv[0], argv);
 757                bb_perror_msg_and_die("can't execute '%s'", argv[0]);
 758                /* notreached */
 759        }
 760
 761        /* Parent */
 762        close(sp[1]);
 763        xmove_fd(sp[0], network_fd);
 764}
 765#endif
 766
 767static void NOINLINE retrieve_file_data(FILE *dfp)
 768{
 769#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 770# if ENABLE_FEATURE_WGET_TIMEOUT
 771        unsigned second_cnt = G.timeout_seconds;
 772# endif
 773        struct pollfd polldata;
 774
 775        polldata.fd = fileno(dfp);
 776        polldata.events = POLLIN | POLLPRI;
 777#endif
 778        progress_meter(PROGRESS_START);
 779
 780        if (G.chunked)
 781                goto get_clen;
 782
 783        /* Loops only if chunked */
 784        while (1) {
 785
 786#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 787                /* Must use nonblocking I/O, otherwise fread will loop
 788                 * and *block* until it reads full buffer,
 789                 * which messes up progress bar and/or timeout logic.
 790                 * Because of nonblocking I/O, we need to dance
 791                 * very carefully around EAGAIN. See explanation at
 792                 * clearerr() calls.
 793                 */
 794                ndelay_on(polldata.fd);
 795#endif
 796                while (1) {
 797                        int n;
 798                        unsigned rdsz;
 799
 800#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 801                        /* fread internally uses read loop, which in our case
 802                         * is usually exited when we get EAGAIN.
 803                         * In this case, libc sets error marker on the stream.
 804                         * Need to clear it before next fread to avoid possible
 805                         * rare false positive ferror below. Rare because usually
 806                         * fread gets more than zero bytes, and we don't fall
 807                         * into if (n <= 0) ...
 808                         */
 809                        clearerr(dfp);
 810#endif
 811                        errno = 0;
 812                        rdsz = sizeof(G.wget_buf);
 813                        if (G.got_clen) {
 814                                if (G.content_len < (off_t)sizeof(G.wget_buf)) {
 815                                        if ((int)G.content_len <= 0)
 816                                                break;
 817                                        rdsz = (unsigned)G.content_len;
 818                                }
 819                        }
 820                        n = fread(G.wget_buf, 1, rdsz, dfp);
 821
 822                        if (n > 0) {
 823                                xwrite(G.output_fd, G.wget_buf, n);
 824#if ENABLE_FEATURE_WGET_STATUSBAR
 825                                G.transferred += n;
 826#endif
 827                                if (G.got_clen) {
 828                                        G.content_len -= n;
 829                                        if (G.content_len == 0)
 830                                                break;
 831                                }
 832#if ENABLE_FEATURE_WGET_TIMEOUT
 833                                second_cnt = G.timeout_seconds;
 834#endif
 835                                goto bump;
 836                        }
 837
 838                        /* n <= 0.
 839                         * man fread:
 840                         * If error occurs, or EOF is reached, the return value
 841                         * is a short item count (or zero).
 842                         * fread does not distinguish between EOF and error.
 843                         */
 844                        if (errno != EAGAIN) {
 845                                if (ferror(dfp)) {
 846                                        progress_meter(PROGRESS_END);
 847                                        bb_perror_msg_and_die(bb_msg_read_error);
 848                                }
 849                                break; /* EOF, not error */
 850                        }
 851
 852#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 853                        /* It was EAGAIN. There is no data. Wait up to one second
 854                         * then abort if timed out, or update the bar and try reading again.
 855                         */
 856                        if (safe_poll(&polldata, 1, 1000) == 0) {
 857# if ENABLE_FEATURE_WGET_TIMEOUT
 858                                if (second_cnt != 0 && --second_cnt == 0) {
 859                                        progress_meter(PROGRESS_END);
 860                                        bb_error_msg_and_die("download timed out");
 861                                }
 862# endif
 863                                /* We used to loop back to poll here,
 864                                 * but there is no great harm in letting fread
 865                                 * to try reading anyway.
 866                                 */
 867                        }
 868#endif
 869 bump:
 870                        /* Need to do it _every_ second for "stalled" indicator
 871                         * to be shown properly.
 872                         */
 873                        progress_meter(PROGRESS_BUMP);
 874                } /* while (reading data) */
 875
 876#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 877                clearerr(dfp);
 878                ndelay_off(polldata.fd); /* else fgets can get very unhappy */
 879#endif
 880                if (!G.chunked)
 881                        break;
 882
 883                fgets_and_trim(dfp); /* Eat empty line */
 884 get_clen:
 885                fgets_and_trim(dfp);
 886                G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
 887                /* FIXME: error check? */
 888                if (G.content_len == 0)
 889                        break; /* all done! */
 890                G.got_clen = 1;
 891                /*
 892                 * Note that fgets may result in some data being buffered in dfp.
 893                 * We loop back to fread, which will retrieve this data.
 894                 * Also note that code has to be arranged so that fread
 895                 * is done _before_ one-second poll wait - poll doesn't know
 896                 * about stdio buffering and can result in spurious one second waits!
 897                 */
 898        }
 899
 900        /* If -c failed, we restart from the beginning,
 901         * but we do not truncate file then, we do it only now, at the end.
 902         * This lets user to ^C if his 99% complete 10 GB file download
 903         * failed to restart *without* losing the almost complete file.
 904         */
 905        {
 906                off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
 907                if (pos != (off_t)-1)
 908                        ftruncate(G.output_fd, pos);
 909        }
 910
 911        /* Draw full bar and free its resources */
 912        G.chunked = 0;  /* makes it show 100% even for chunked download */
 913        G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
 914        progress_meter(PROGRESS_END);
 915}
 916
 917static void download_one_url(const char *url)
 918{
 919        bool use_proxy;                 /* Use proxies if env vars are set  */
 920        int redir_limit;
 921        len_and_sockaddr *lsa;
 922        FILE *sfp;                      /* socket to web/ftp server         */
 923        FILE *dfp;                      /* socket to ftp server (data)      */
 924        char *proxy = NULL;
 925        char *fname_out_alloc;
 926        char *redirected_path = NULL;
 927        struct host_info server;
 928        struct host_info target;
 929
 930        server.allocated = NULL;
 931        target.allocated = NULL;
 932        server.user = NULL;
 933        target.user = NULL;
 934
 935        parse_url(url, &target);
 936
 937        /* Use the proxy if necessary */
 938        use_proxy = (strcmp(G.proxy_flag, "off") != 0);
 939        if (use_proxy) {
 940                proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
 941//FIXME: what if protocol is https? Ok to use http_proxy?
 942                use_proxy = (proxy && proxy[0]);
 943                if (use_proxy)
 944                        parse_url(proxy, &server);
 945        }
 946        if (!use_proxy) {
 947                server.port = target.port;
 948                if (ENABLE_FEATURE_IPV6) {
 949                        //free(server.allocated); - can't be non-NULL
 950                        server.host = server.allocated = xstrdup(target.host);
 951                } else {
 952                        server.host = target.host;
 953                }
 954        }
 955
 956        if (ENABLE_FEATURE_IPV6)
 957                strip_ipv6_scope_id(target.host);
 958
 959        /* If there was no -O FILE, guess output filename */
 960        fname_out_alloc = NULL;
 961        if (!(option_mask32 & WGET_OPT_OUTNAME)) {
 962                G.fname_out = bb_get_last_path_component_nostrip(target.path);
 963                /* handle "wget http://kernel.org//" */
 964                if (G.fname_out[0] == '/' || !G.fname_out[0])
 965                        G.fname_out = (char*)"index.html";
 966                /* -P DIR is considered only if there was no -O FILE */
 967                if (G.dir_prefix)
 968                        G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
 969                else {
 970                        /* redirects may free target.path later, need to make a copy */
 971                        G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
 972                }
 973        }
 974#if ENABLE_FEATURE_WGET_STATUSBAR
 975        G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
 976#endif
 977
 978        /* Determine where to start transfer */
 979        G.beg_range = 0;
 980        if (option_mask32 & WGET_OPT_CONTINUE) {
 981                G.output_fd = open(G.fname_out, O_WRONLY);
 982                if (G.output_fd >= 0) {
 983                        G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
 984                }
 985                /* File doesn't exist. We do not create file here yet.
 986                 * We are not sure it exists on remote side */
 987        }
 988
 989        redir_limit = 5;
 990 resolve_lsa:
 991        lsa = xhost2sockaddr(server.host, server.port);
 992        if (!(option_mask32 & WGET_OPT_QUIET)) {
 993                char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
 994                fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
 995                free(s);
 996        }
 997 establish_session:
 998        /*G.content_len = 0; - redundant, got_clen = 0 is enough */
 999        G.got_clen = 0;
1000        G.chunked = 0;
1001        if (use_proxy || target.protocol != P_FTP) {
1002                /*
1003                 *  HTTP session
1004                 */
1005                char *str;
1006                int status;
1007
1008                /* Open socket to http(s) server */
1009#if ENABLE_FEATURE_WGET_OPENSSL
1010                /* openssl (and maybe ssl_helper) support is configured */
1011                if (target.protocol == P_HTTPS) {
1012                        /* openssl-based helper
1013                         * Inconvenient API since we can't give it an open fd
1014                         */
1015                        int fd = spawn_https_helper_openssl(server.host, server.port);
1016# if ENABLE_FEATURE_WGET_SSL_HELPER
1017                        if (fd < 0) { /* no openssl? try ssl_helper */
1018                                sfp = open_socket(lsa);
1019                                spawn_https_helper_small(fileno(sfp));
1020                                goto socket_opened;
1021                        }
1022# else
1023                        /* We don't check for exec("openssl") failure in this case */
1024# endif
1025                        sfp = fdopen(fd, "r+");
1026                        if (!sfp)
1027                                bb_perror_msg_and_die(bb_msg_memory_exhausted);
1028                        goto socket_opened;
1029                }
1030                sfp = open_socket(lsa);
1031 socket_opened:
1032#elif ENABLE_FEATURE_WGET_SSL_HELPER
1033                /* Only ssl_helper support is configured */
1034                sfp = open_socket(lsa);
1035                if (target.protocol == P_HTTPS)
1036                        spawn_https_helper_small(fileno(sfp));
1037#else
1038                /* ssl (https) support is not configured */
1039                sfp = open_socket(lsa);
1040#endif
1041                /* Send HTTP request */
1042                if (use_proxy) {
1043                        SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1044                                target.protocol, target.host,
1045                                target.path);
1046                } else {
1047                        SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1048                                (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1049                                target.path);
1050                }
1051                if (!USR_HEADER_HOST)
1052                        SENDFMT(sfp, "Host: %s\r\n", target.host);
1053                if (!USR_HEADER_USER_AGENT)
1054                        SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1055
1056                /* Ask server to close the connection as soon as we are done
1057                 * (IOW: we do not intend to send more requests)
1058                 */
1059                SENDFMT(sfp, "Connection: close\r\n");
1060
1061#if ENABLE_FEATURE_WGET_AUTHENTICATION
1062                if (target.user && !USR_HEADER_AUTH) {
1063                        SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1064                                base64enc(target.user));
1065                }
1066                if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1067                        SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1068                                base64enc(server.user));
1069                }
1070#endif
1071
1072                if (G.beg_range != 0 && !USR_HEADER_RANGE)
1073                        SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1074
1075#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1076                if (G.extra_headers) {
1077                        log_io(G.extra_headers);
1078                        fputs(G.extra_headers, sfp);
1079                }
1080
1081                if (option_mask32 & WGET_OPT_POST_DATA) {
1082                        SENDFMT(sfp,
1083                                "Content-Type: application/x-www-form-urlencoded\r\n"
1084                                "Content-Length: %u\r\n"
1085                                "\r\n"
1086                                "%s",
1087                                (int) strlen(G.post_data), G.post_data
1088                        );
1089                } else
1090#endif
1091                {
1092                        SENDFMT(sfp, "\r\n");
1093                }
1094
1095                fflush(sfp);
1096                /* If we use SSL helper, keeping our end of the socket open for writing
1097                 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1098                 * even after child closes its copy of the fd.
1099                 * This helps:
1100                 */
1101                shutdown(fileno(sfp), SHUT_WR);
1102
1103                /*
1104                 * Retrieve HTTP response line and check for "200" status code.
1105                 */
1106 read_response:
1107                fgets_and_trim(sfp);
1108
1109                str = G.wget_buf;
1110                str = skip_non_whitespace(str);
1111                str = skip_whitespace(str);
1112                // FIXME: no error check
1113                // xatou wouldn't work: "200 OK"
1114                status = atoi(str);
1115                switch (status) {
1116                case 0:
1117                case 100:
1118                        while (gethdr(sfp) != NULL)
1119                                /* eat all remaining headers */;
1120                        goto read_response;
1121
1122                /* Success responses */
1123                case 200:
1124                        /* fall through */
1125                case 201: /* 201 Created */
1126/* "The request has been fulfilled and resulted in a new resource being created" */
1127                        /* Standard wget is reported to treat this as success */
1128                        /* fall through */
1129                case 202: /* 202 Accepted */
1130/* "The request has been accepted for processing, but the processing has not been completed" */
1131                        /* Treat as success: fall through */
1132                case 203: /* 203 Non-Authoritative Information */
1133/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1134                        /* fall through */
1135                case 204: /* 204 No Content */
1136/*
1137Response 204 doesn't say "null file", it says "metadata
1138has changed but data didn't":
1139
1140"10.2.5 204 No Content
1141The server has fulfilled the request but does not need to return
1142an entity-body, and might want to return updated metainformation.
1143The response MAY include new or updated metainformation in the form
1144of entity-headers, which if present SHOULD be associated with
1145the requested variant.
1146
1147If the client is a user agent, it SHOULD NOT change its document
1148view from that which caused the request to be sent. This response
1149is primarily intended to allow input for actions to take place
1150without causing a change to the user agent's active document view,
1151although any new or updated metainformation SHOULD be applied
1152to the document currently in the user agent's active view.
1153
1154The 204 response MUST NOT include a message-body, and thus
1155is always terminated by the first empty line after the header fields."
1156
1157However, in real world it was observed that some web servers
1158(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1159*/
1160                        if (G.beg_range != 0) {
1161                                /* "Range:..." was not honored by the server.
1162                                 * Restart download from the beginning.
1163                                 */
1164                                reset_beg_range_to_zero();
1165                        }
1166                        break;
1167                /* 205 Reset Content ?? what to do on this ??   */
1168
1169                case 300:  /* redirection */
1170                case 301:
1171                case 302:
1172                case 303:
1173                        break;
1174
1175                case 206: /* Partial Content */
1176                        if (G.beg_range != 0)
1177                                /* "Range:..." worked. Good. */
1178                                break;
1179                        /* Partial Content even though we did not ask for it??? */
1180                        /* fall through */
1181                default:
1182                        bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1183                }
1184
1185                /*
1186                 * Retrieve HTTP headers.
1187                 */
1188                while ((str = gethdr(sfp)) != NULL) {
1189                        static const char keywords[] ALIGN1 =
1190                                "content-length\0""transfer-encoding\0""location\0";
1191                        enum {
1192                                KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1193                        };
1194                        smalluint key;
1195
1196                        /* gethdr converted "FOO:" string to lowercase */
1197
1198                        /* strip trailing whitespace */
1199                        char *s = strchrnul(str, '\0') - 1;
1200                        while (s >= str && (*s == ' ' || *s == '\t')) {
1201                                *s = '\0';
1202                                s--;
1203                        }
1204                        key = index_in_strings(keywords, G.wget_buf) + 1;
1205                        if (key == KEY_content_length) {
1206                                G.content_len = BB_STRTOOFF(str, NULL, 10);
1207                                if (G.content_len < 0 || errno) {
1208                                        bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1209                                }
1210                                G.got_clen = 1;
1211                                continue;
1212                        }
1213                        if (key == KEY_transfer_encoding) {
1214                                if (strcmp(str_tolower(str), "chunked") != 0)
1215                                        bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1216                                G.chunked = 1;
1217                        }
1218                        if (key == KEY_location && status >= 300) {
1219                                if (--redir_limit == 0)
1220                                        bb_error_msg_and_die("too many redirections");
1221                                fclose(sfp);
1222                                if (str[0] == '/') {
1223                                        free(redirected_path);
1224                                        target.path = redirected_path = xstrdup(str+1);
1225                                        /* lsa stays the same: it's on the same server */
1226                                } else {
1227                                        parse_url(str, &target);
1228                                        if (!use_proxy) {
1229                                                /* server.user remains untouched */
1230                                                free(server.allocated);
1231                                                server.allocated = NULL;
1232                                                server.host = target.host;
1233                                                /* strip_ipv6_scope_id(target.host); - no! */
1234                                                /* we assume remote never gives us IPv6 addr with scope id */
1235                                                server.port = target.port;
1236                                                free(lsa);
1237                                                goto resolve_lsa;
1238                                        } /* else: lsa stays the same: we use proxy */
1239                                }
1240                                goto establish_session;
1241                        }
1242                }
1243//              if (status >= 300)
1244//                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
1245
1246                /* For HTTP, data is pumped over the same connection */
1247                dfp = sfp;
1248        } else {
1249                /*
1250                 *  FTP session
1251                 */
1252                sfp = prepare_ftp_session(&dfp, &target, lsa);
1253        }
1254
1255        free(lsa);
1256
1257        if (!(option_mask32 & WGET_OPT_SPIDER)) {
1258                if (G.output_fd < 0)
1259                        G.output_fd = xopen(G.fname_out, G.o_flags);
1260                retrieve_file_data(dfp);
1261                if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1262                        xclose(G.output_fd);
1263                        G.output_fd = -1;
1264                }
1265        }
1266
1267        if (dfp != sfp) {
1268                /* It's ftp. Close data connection properly */
1269                fclose(dfp);
1270                if (ftpcmd(NULL, NULL, sfp) != 226)
1271                        bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1272                /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1273        }
1274        fclose(sfp);
1275
1276        free(server.allocated);
1277        free(target.allocated);
1278        free(server.user);
1279        free(target.user);
1280        free(fname_out_alloc);
1281        free(redirected_path);
1282}
1283
1284int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1285int wget_main(int argc UNUSED_PARAM, char **argv)
1286{
1287#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1288        static const char wget_longopts[] ALIGN1 =
1289                /* name, has_arg, val */
1290                "continue\0"         No_argument       "c"
1291                "quiet\0"            No_argument       "q"
1292                "output-document\0"  Required_argument "O"
1293                "directory-prefix\0" Required_argument "P"
1294                "proxy\0"            Required_argument "Y"
1295                "user-agent\0"       Required_argument "U"
1296IF_FEATURE_WGET_TIMEOUT(
1297                "timeout\0"          Required_argument "T")
1298                /* Ignored: */
1299IF_DESKTOP(     "tries\0"            Required_argument "t")
1300                "header\0"           Required_argument "\xff"
1301                "post-data\0"        Required_argument "\xfe"
1302                "spider\0"           No_argument       "\xfd"
1303                /* Ignored (we always use PASV): */
1304IF_DESKTOP(     "passive-ftp\0"      No_argument       "\xf0")
1305                /* Ignored (we don't do ssl) */
1306IF_DESKTOP(     "no-check-certificate\0" No_argument   "\xf0")
1307                /* Ignored (we don't support caching) */
1308IF_DESKTOP(     "no-cache\0"         No_argument       "\xf0")
1309IF_DESKTOP(     "no-verbose\0"       No_argument       "\xf0")
1310IF_DESKTOP(     "no-clobber\0"       No_argument       "\xf0")
1311IF_DESKTOP(     "no-host-directories\0" No_argument    "\xf0")
1312IF_DESKTOP(     "no-parent\0"        No_argument       "\xf0")
1313                ;
1314#endif
1315
1316#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1317        llist_t *headers_llist = NULL;
1318#endif
1319
1320        INIT_G();
1321
1322#if ENABLE_FEATURE_WGET_TIMEOUT
1323        G.timeout_seconds = 900;
1324        signal(SIGALRM, alarm_handler);
1325#endif
1326        G.proxy_flag = "on";   /* use proxies if env vars are set */
1327        G.user_agent = "Wget"; /* "User-Agent" header field */
1328
1329#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1330        applet_long_options = wget_longopts;
1331#endif
1332        opt_complementary = "-1" /* at least one URL */
1333                IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1334        getopt32(argv, "cqO:P:Y:U:T:+"
1335                /*ignored:*/ "t:"
1336                /*ignored:*/ "n::"
1337                /* wget has exactly four -n<letter> opts, all of which we can ignore:
1338                 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1339                 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1340                 * -nH --no-host-directories: wget -r http://host/ won't create host/
1341                 * -np --no-parent
1342                 * "n::" above says that we accept -n[ARG].
1343                 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1344                 */
1345                , &G.fname_out, &G.dir_prefix,
1346                &G.proxy_flag, &G.user_agent,
1347                IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1348                NULL, /* -t RETRIES */
1349                NULL  /* -n[ARG] */
1350                IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1351                IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1352        );
1353#if 0 /* option bits debug */
1354        if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1355        if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1356        if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1357        if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1358        if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1359        exit(0);
1360#endif
1361        argv += optind;
1362
1363#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1364        if (headers_llist) {
1365                int size = 0;
1366                char *hdr;
1367                llist_t *ll = headers_llist;
1368                while (ll) {
1369                        size += strlen(ll->data) + 2;
1370                        ll = ll->link;
1371                }
1372                G.extra_headers = hdr = xmalloc(size + 1);
1373                while (headers_llist) {
1374                        int bit;
1375                        const char *words;
1376
1377                        size = sprintf(hdr, "%s\r\n",
1378                                        (char*)llist_pop(&headers_llist));
1379                        /* a bit like index_in_substrings but don't match full key */
1380                        bit = 1;
1381                        words = wget_user_headers;
1382                        while (*words) {
1383                                if (strstr(hdr, words) == hdr) {
1384                                        G.user_headers |= bit;
1385                                        break;
1386                                }
1387                                bit <<= 1;
1388                                words += strlen(words) + 1;
1389                        }
1390                        hdr += size;
1391                }
1392        }
1393#endif
1394
1395        G.output_fd = -1;
1396        G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1397        if (G.fname_out) { /* -O FILE ? */
1398                if (LONE_DASH(G.fname_out)) { /* -O - ? */
1399                        G.output_fd = 1;
1400                        option_mask32 &= ~WGET_OPT_CONTINUE;
1401                }
1402                /* compat with wget: -O FILE can overwrite */
1403                G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1404        }
1405
1406        while (*argv)
1407                download_one_url(*argv++);
1408
1409        if (G.output_fd >= 0)
1410                xclose(G.output_fd);
1411
1412#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1413        free(G.extra_headers);
1414#endif
1415        FINI_G();
1416
1417        return EXIT_SUCCESS;
1418}
1419