busybox/networking/wget.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wget - retrieve a file using HTTP or FTP
   4 *
   5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
   6 * Licensed under GPLv2, see file LICENSE in this source tree.
   7 *
   8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
   9 * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
  10 */
  11#include "libbb.h"
  12
  13struct host_info {
  14        // May be used if we ever will want to free() all xstrdup()s...
  15        /* char *allocated; */
  16        const char *path;
  17        const char *user;
  18        char       *host;
  19        int         port;
  20        smallint    is_ftp;
  21};
  22
  23
  24/* Globals */
  25struct globals {
  26        off_t content_len;        /* Content-length of the file */
  27        off_t beg_range;          /* Range at which continue begins */
  28#if ENABLE_FEATURE_WGET_STATUSBAR
  29        off_t transferred;        /* Number of bytes transferred so far */
  30        const char *curfile;      /* Name of current file being transferred */
  31        bb_progress_t pmt;
  32#endif
  33#if ENABLE_FEATURE_WGET_TIMEOUT
  34        unsigned timeout_seconds;
  35#endif
  36        smallint chunked;         /* chunked transfer encoding */
  37        smallint got_clen;        /* got content-length: from server  */
  38} FIX_ALIASING;
  39#define G (*(struct globals*)&bb_common_bufsiz1)
  40struct BUG_G_too_big {
  41        char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
  42};
  43#define INIT_G() do { \
  44        IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
  45} while (0)
  46
  47
  48/* Must match option string! */
  49enum {
  50        WGET_OPT_CONTINUE   = (1 << 0),
  51        WGET_OPT_SPIDER     = (1 << 1),
  52        WGET_OPT_QUIET      = (1 << 2),
  53        WGET_OPT_OUTNAME    = (1 << 3),
  54        WGET_OPT_PREFIX     = (1 << 4),
  55        WGET_OPT_PROXY      = (1 << 5),
  56        WGET_OPT_USER_AGENT = (1 << 6),
  57        WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
  58        WGET_OPT_RETRIES    = (1 << 8),
  59        WGET_OPT_PASSIVE    = (1 << 9),
  60        WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
  61        WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
  62};
  63
  64enum {
  65        PROGRESS_START = -1,
  66        PROGRESS_END   = 0,
  67        PROGRESS_BUMP  = 1,
  68};
  69#if ENABLE_FEATURE_WGET_STATUSBAR
  70static void progress_meter(int flag)
  71{
  72        if (option_mask32 & WGET_OPT_QUIET)
  73                return;
  74
  75        if (flag == PROGRESS_START)
  76                bb_progress_init(&G.pmt);
  77
  78        bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
  79                           G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
  80
  81        if (flag == PROGRESS_END) {
  82                bb_putchar_stderr('\n');
  83                G.transferred = 0;
  84        }
  85}
  86#else
  87static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
  88#endif
  89
  90
  91/* IPv6 knows scoped address types i.e. link and site local addresses. Link
  92 * local addresses can have a scope identifier to specify the
  93 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
  94 * identifier is only valid on a single node.
  95 *
  96 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
  97 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
  98 * in the Host header as invalid requests, see
  99 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
 100 */
 101static void strip_ipv6_scope_id(char *host)
 102{
 103        char *scope, *cp;
 104
 105        /* bbox wget actually handles IPv6 addresses without [], like
 106         * wget "http://::1/xxx", but this is not standard.
 107         * To save code, _here_ we do not support it. */
 108
 109        if (host[0] != '[')
 110                return; /* not IPv6 */
 111
 112        scope = strchr(host, '%');
 113        if (!scope)
 114                return;
 115
 116        /* Remove the IPv6 zone identifier from the host address */
 117        cp = strchr(host, ']');
 118        if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
 119                /* malformed address (not "[xx]:nn" or "[xx]") */
 120                return;
 121        }
 122
 123        /* cp points to "]...", scope points to "%eth0]..." */
 124        overlapping_strcpy(scope, cp);
 125}
 126
 127/* Read NMEMB bytes into PTR from STREAM.  Returns the number of bytes read,
 128 * and a short count if an eof or non-interrupt error is encountered.  */
 129static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
 130{
 131        size_t ret;
 132        char *p = (char*)ptr;
 133
 134        do {
 135                clearerr(stream);
 136                errno = 0;
 137                ret = fread(p, 1, nmemb, stream);
 138                p += ret;
 139                nmemb -= ret;
 140        } while (nmemb && ferror(stream) && errno == EINTR);
 141
 142        return p - (char*)ptr;
 143}
 144
 145/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
 146 * Returns S, or NULL if an eof or non-interrupt error is encountered.  */
 147static char *safe_fgets(char *s, int size, FILE *stream)
 148{
 149        char *ret;
 150
 151        do {
 152                clearerr(stream);
 153                errno = 0;
 154                ret = fgets(s, size, stream);
 155        } while (ret == NULL && ferror(stream) && errno == EINTR);
 156
 157        return ret;
 158}
 159
 160#if ENABLE_FEATURE_WGET_AUTHENTICATION
 161/* Base64-encode character string. buf is assumed to be char buf[512]. */
 162static char *base64enc_512(char buf[512], const char *str)
 163{
 164        unsigned len = strlen(str);
 165        if (len > 512/4*3 - 10) /* paranoia */
 166                len = 512/4*3 - 10;
 167        bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
 168        return buf;
 169}
 170#endif
 171
 172static char* sanitize_string(char *s)
 173{
 174        unsigned char *p = (void *) s;
 175        while (*p >= ' ')
 176                p++;
 177        *p = '\0';
 178        return s;
 179}
 180
 181static FILE *open_socket(len_and_sockaddr *lsa)
 182{
 183        FILE *fp;
 184
 185        /* glibc 2.4 seems to try seeking on it - ??! */
 186        /* hopefully it understands what ESPIPE means... */
 187        fp = fdopen(xconnect_stream(lsa), "r+");
 188        if (fp == NULL)
 189                bb_perror_msg_and_die("fdopen");
 190
 191        return fp;
 192}
 193
 194static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
 195{
 196        int result;
 197        if (s1) {
 198                if (!s2) s2 = "";
 199                fprintf(fp, "%s%s\r\n", s1, s2);
 200                fflush(fp);
 201        }
 202
 203        do {
 204                char *buf_ptr;
 205
 206                if (fgets(buf, 510, fp) == NULL) {
 207                        bb_perror_msg_and_die("error getting response");
 208                }
 209                buf_ptr = strstr(buf, "\r\n");
 210                if (buf_ptr) {
 211                        *buf_ptr = '\0';
 212                }
 213        } while (!isdigit(buf[0]) || buf[3] != ' ');
 214
 215        buf[3] = '\0';
 216        result = xatoi_positive(buf);
 217        buf[3] = ' ';
 218        return result;
 219}
 220
 221static void parse_url(char *src_url, struct host_info *h)
 222{
 223        char *url, *p, *sp;
 224
 225        /* h->allocated = */ url = xstrdup(src_url);
 226
 227        if (strncmp(url, "http://", 7) == 0) {
 228                h->port = bb_lookup_port("http", "tcp", 80);
 229                h->host = url + 7;
 230                h->is_ftp = 0;
 231        } else if (strncmp(url, "ftp://", 6) == 0) {
 232                h->port = bb_lookup_port("ftp", "tcp", 21);
 233                h->host = url + 6;
 234                h->is_ftp = 1;
 235        } else
 236                bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
 237
 238        // FYI:
 239        // "Real" wget 'http://busybox.net?var=a/b' sends this request:
 240        //   'GET /?var=a/b HTTP 1.0'
 241        //   and saves 'index.html?var=a%2Fb' (we save 'b')
 242        // wget 'http://busybox.net?login=john@doe':
 243        //   request: 'GET /?login=john@doe HTTP/1.0'
 244        //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
 245        // wget 'http://busybox.net#test/test':
 246        //   request: 'GET / HTTP/1.0'
 247        //   saves: 'index.html' (we save 'test')
 248        //
 249        // We also don't add unique .N suffix if file exists...
 250        sp = strchr(h->host, '/');
 251        p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
 252        p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
 253        if (!sp) {
 254                h->path = "";
 255        } else if (*sp == '/') {
 256                *sp = '\0';
 257                h->path = sp + 1;
 258        } else { // '#' or '?'
 259                // http://busybox.net?login=john@doe is a valid URL
 260                // memmove converts to:
 261                // http:/busybox.nett?login=john@doe...
 262                memmove(h->host - 1, h->host, sp - h->host);
 263                h->host--;
 264                sp[-1] = '\0';
 265                h->path = sp;
 266        }
 267
 268        // We used to set h->user to NULL here, but this interferes
 269        // with handling of code 302 ("object was moved")
 270
 271        sp = strrchr(h->host, '@');
 272        if (sp != NULL) {
 273                h->user = h->host;
 274                *sp = '\0';
 275                h->host = sp + 1;
 276        }
 277
 278        sp = h->host;
 279}
 280
 281static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
 282{
 283        char *s, *hdrval;
 284        int c;
 285
 286        /* *istrunc = 0; */
 287
 288        /* retrieve header line */
 289        if (fgets(buf, bufsiz, fp) == NULL)
 290                return NULL;
 291
 292        /* see if we are at the end of the headers */
 293        for (s = buf; *s == '\r'; ++s)
 294                continue;
 295        if (*s == '\n')
 296                return NULL;
 297
 298        /* convert the header name to lower case */
 299        for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
 300                /* tolower for "A-Z", no-op for "0-9a-z-." */
 301                *s = (*s | 0x20);
 302        }
 303
 304        /* verify we are at the end of the header name */
 305        if (*s != ':')
 306                bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
 307
 308        /* locate the start of the header value */
 309        *s++ = '\0';
 310        hdrval = skip_whitespace(s);
 311
 312        /* locate the end of header */
 313        while (*s && *s != '\r' && *s != '\n')
 314                ++s;
 315
 316        /* end of header found */
 317        if (*s) {
 318                *s = '\0';
 319                return hdrval;
 320        }
 321
 322        /* Rats! The buffer isn't big enough to hold the entire header value */
 323        while (c = getc(fp), c != EOF && c != '\n')
 324                continue;
 325        /* *istrunc = 1; */
 326        return hdrval;
 327}
 328
 329#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 330static char *URL_escape(const char *str)
 331{
 332        /* URL encode, see RFC 2396 */
 333        char *dst;
 334        char *res = dst = xmalloc(strlen(str) * 3 + 1);
 335        unsigned char c;
 336
 337        while (1) {
 338                c = *str++;
 339                if (c == '\0'
 340                /* || strchr("!&'()*-.=_~", c) - more code */
 341                 || c == '!'
 342                 || c == '&'
 343                 || c == '\''
 344                 || c == '('
 345                 || c == ')'
 346                 || c == '*'
 347                 || c == '-'
 348                 || c == '.'
 349                 || c == '='
 350                 || c == '_'
 351                 || c == '~'
 352                 || (c >= '0' && c <= '9')
 353                 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
 354                ) {
 355                        *dst++ = c;
 356                        if (c == '\0')
 357                                return res;
 358                } else {
 359                        *dst++ = '%';
 360                        *dst++ = bb_hexdigits_upcase[c >> 4];
 361                        *dst++ = bb_hexdigits_upcase[c & 0xf];
 362                }
 363        }
 364}
 365#endif
 366
 367static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
 368{
 369        char buf[512];
 370        FILE *sfp;
 371        char *str;
 372        int port;
 373
 374        if (!target->user)
 375                target->user = xstrdup("anonymous:busybox@");
 376
 377        sfp = open_socket(lsa);
 378        if (ftpcmd(NULL, NULL, sfp, buf) != 220)
 379                bb_error_msg_and_die("%s", sanitize_string(buf+4));
 380
 381        /*
 382         * Splitting username:password pair,
 383         * trying to log in
 384         */
 385        str = strchr(target->user, ':');
 386        if (str)
 387                *str++ = '\0';
 388        switch (ftpcmd("USER ", target->user, sfp, buf)) {
 389        case 230:
 390                break;
 391        case 331:
 392                if (ftpcmd("PASS ", str, sfp, buf) == 230)
 393                        break;
 394                /* fall through (failed login) */
 395        default:
 396                bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
 397        }
 398
 399        ftpcmd("TYPE I", NULL, sfp, buf);
 400
 401        /*
 402         * Querying file size
 403         */
 404        if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
 405                G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
 406                if (G.content_len < 0 || errno) {
 407                        bb_error_msg_and_die("SIZE value is garbage");
 408                }
 409                G.got_clen = 1;
 410        }
 411
 412        /*
 413         * Entering passive mode
 414         */
 415        if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
 416 pasv_error:
 417                bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
 418        }
 419        // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
 420        // Server's IP is N1.N2.N3.N4 (we ignore it)
 421        // Server's port for data connection is P1*256+P2
 422        str = strrchr(buf, ')');
 423        if (str) str[0] = '\0';
 424        str = strrchr(buf, ',');
 425        if (!str) goto pasv_error;
 426        port = xatou_range(str+1, 0, 255);
 427        *str = '\0';
 428        str = strrchr(buf, ',');
 429        if (!str) goto pasv_error;
 430        port += xatou_range(str+1, 0, 255) * 256;
 431        set_nport(lsa, htons(port));
 432
 433        *dfpp = open_socket(lsa);
 434
 435        if (G.beg_range) {
 436                sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
 437                if (ftpcmd(buf, NULL, sfp, buf) == 350)
 438                        G.content_len -= G.beg_range;
 439        }
 440
 441        if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
 442                bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
 443
 444        return sfp;
 445}
 446
 447static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
 448{
 449        char buf[4*1024]; /* made bigger to speed up local xfers */
 450#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 451# if ENABLE_FEATURE_WGET_TIMEOUT
 452        unsigned second_cnt;
 453# endif
 454        struct pollfd polldata;
 455
 456        polldata.fd = fileno(dfp);
 457        polldata.events = POLLIN | POLLPRI;
 458#endif
 459        progress_meter(PROGRESS_START);
 460
 461        if (G.chunked)
 462                goto get_clen;
 463
 464        /* Loops only if chunked */
 465        while (1) {
 466
 467#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 468                ndelay_on(polldata.fd);
 469#endif
 470                while (1) {
 471                        int n;
 472                        unsigned rdsz;
 473
 474                        rdsz = sizeof(buf);
 475                        if (G.got_clen) {
 476                                if (G.content_len < (off_t)sizeof(buf)) {
 477                                        if ((int)G.content_len <= 0)
 478                                                break;
 479                                        rdsz = (unsigned)G.content_len;
 480                                }
 481                        }
 482#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 483# if ENABLE_FEATURE_WGET_TIMEOUT
 484                        second_cnt = G.timeout_seconds;
 485# endif
 486                        while (1) {
 487                                if (safe_poll(&polldata, 1, 1000) != 0)
 488                                        break; /* error, EOF, or data is available */
 489# if ENABLE_FEATURE_WGET_TIMEOUT
 490                                if (second_cnt != 0 && --second_cnt == 0) {
 491                                        progress_meter(PROGRESS_END);
 492                                        bb_perror_msg_and_die("download timed out");
 493                                }
 494# endif
 495                                /* Needed for "stalled" indicator */
 496                                progress_meter(PROGRESS_BUMP);
 497                        }
 498#endif
 499                        /* fread internally uses read loop, which in our case
 500                         * is usually exited when we get EAGAIN.
 501                         * In this case, libc sets error marker on the stream.
 502                         * Need to clear it before next fread to avoid possible
 503                         * rare false positive ferror below. Rare because usually
 504                         * fread gets more than zero bytes, and we don't fall
 505                         * into if (n <= 0) ...
 506                         */
 507                        clearerr(dfp);
 508                        errno = 0;
 509                        n = safe_fread(buf, rdsz, dfp);
 510                        /* man fread:
 511                         * If error occurs, or EOF is reached, the return value
 512                         * is a short item count (or zero).
 513                         * fread does not distinguish between EOF and error.
 514                         */
 515                        if (n <= 0) {
 516#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 517                                if (errno == EAGAIN) /* poll lied, there is no data? */
 518                                        continue; /* yes */
 519#endif
 520                                if (ferror(dfp))
 521                                        bb_perror_msg_and_die(bb_msg_read_error);
 522                                break; /* EOF, not error */
 523                        }
 524
 525                        xwrite(output_fd, buf, n);
 526#if ENABLE_FEATURE_WGET_STATUSBAR
 527                        G.transferred += n;
 528                        progress_meter(PROGRESS_BUMP);
 529#endif
 530                        if (G.got_clen) {
 531                                G.content_len -= n;
 532                                if (G.content_len == 0)
 533                                        break;
 534                        }
 535                }
 536#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 537                ndelay_off(polldata.fd);
 538#endif
 539
 540                if (!G.chunked)
 541                        break;
 542
 543                safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
 544 get_clen:
 545                safe_fgets(buf, sizeof(buf), dfp);
 546                G.content_len = STRTOOFF(buf, NULL, 16);
 547                /* FIXME: error check? */
 548                if (G.content_len == 0)
 549                        break; /* all done! */
 550                G.got_clen = 1;
 551        }
 552
 553        progress_meter(PROGRESS_END);
 554}
 555
 556int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 557int wget_main(int argc UNUSED_PARAM, char **argv)
 558{
 559        char buf[512];
 560        struct host_info server, target;
 561        len_and_sockaddr *lsa;
 562        unsigned opt;
 563        int redir_limit;
 564        char *proxy = NULL;
 565        char *dir_prefix = NULL;
 566#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 567        char *post_data;
 568        char *extra_headers = NULL;
 569        llist_t *headers_llist = NULL;
 570#endif
 571        FILE *sfp;                      /* socket to web/ftp server         */
 572        FILE *dfp;                      /* socket to ftp server (data)      */
 573        char *fname_out;                /* where to direct output (-O)      */
 574        int output_fd = -1;
 575        bool use_proxy;                 /* Use proxies if env vars are set  */
 576        const char *proxy_flag = "on";  /* Use proxies if env vars are set  */
 577        const char *user_agent = "Wget";/* "User-Agent" header field        */
 578
 579        static const char keywords[] ALIGN1 =
 580                "content-length\0""transfer-encoding\0""chunked\0""location\0";
 581        enum {
 582                KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
 583        };
 584#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 585        static const char wget_longopts[] ALIGN1 =
 586                /* name, has_arg, val */
 587                "continue\0"         No_argument       "c"
 588                "spider\0"           No_argument       "s"
 589                "quiet\0"            No_argument       "q"
 590                "output-document\0"  Required_argument "O"
 591                "directory-prefix\0" Required_argument "P"
 592                "proxy\0"            Required_argument "Y"
 593                "user-agent\0"       Required_argument "U"
 594#if ENABLE_FEATURE_WGET_TIMEOUT
 595                "timeout\0"          Required_argument "T"
 596#endif
 597                /* Ignored: */
 598                // "tries\0"            Required_argument "t"
 599                /* Ignored (we always use PASV): */
 600                "passive-ftp\0"      No_argument       "\xff"
 601                "header\0"           Required_argument "\xfe"
 602                "post-data\0"        Required_argument "\xfd"
 603                /* Ignored (we don't do ssl) */
 604                "no-check-certificate\0" No_argument   "\xfc"
 605                ;
 606#endif
 607
 608        INIT_G();
 609
 610#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 611        applet_long_options = wget_longopts;
 612#endif
 613        /* server.allocated = target.allocated = NULL; */
 614        opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
 615        opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
 616                                &fname_out, &dir_prefix,
 617                                &proxy_flag, &user_agent,
 618                                IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
 619                                NULL /* -t RETRIES */
 620                                IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
 621                                IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
 622                                );
 623#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 624        if (headers_llist) {
 625                int size = 1;
 626                char *cp;
 627                llist_t *ll = headers_llist;
 628                while (ll) {
 629                        size += strlen(ll->data) + 2;
 630                        ll = ll->link;
 631                }
 632                extra_headers = cp = xmalloc(size);
 633                while (headers_llist) {
 634                        cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
 635                }
 636        }
 637#endif
 638
 639        /* TODO: compat issue: should handle "wget URL1 URL2..." */
 640
 641        target.user = NULL;
 642        parse_url(argv[optind], &target);
 643
 644        /* Use the proxy if necessary */
 645        use_proxy = (strcmp(proxy_flag, "off") != 0);
 646        if (use_proxy) {
 647                proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
 648                if (proxy && proxy[0]) {
 649                        server.user = NULL;
 650                        parse_url(proxy, &server);
 651                } else {
 652                        use_proxy = 0;
 653                }
 654        }
 655        if (!use_proxy) {
 656                server.port = target.port;
 657                if (ENABLE_FEATURE_IPV6) {
 658                        server.host = xstrdup(target.host);
 659                } else {
 660                        server.host = target.host;
 661                }
 662        }
 663
 664        if (ENABLE_FEATURE_IPV6)
 665                strip_ipv6_scope_id(target.host);
 666
 667        /* Guess an output filename, if there was no -O FILE */
 668        if (!(opt & WGET_OPT_OUTNAME)) {
 669                fname_out = bb_get_last_path_component_nostrip(target.path);
 670                /* handle "wget http://kernel.org//" */
 671                if (fname_out[0] == '/' || !fname_out[0])
 672                        fname_out = (char*)"index.html";
 673                /* -P DIR is considered only if there was no -O FILE */
 674                if (dir_prefix)
 675                        fname_out = concat_path_file(dir_prefix, fname_out);
 676        } else {
 677                if (LONE_DASH(fname_out)) {
 678                        /* -O - */
 679                        output_fd = 1;
 680                        opt &= ~WGET_OPT_CONTINUE;
 681                }
 682        }
 683#if ENABLE_FEATURE_WGET_STATUSBAR
 684        G.curfile = bb_get_last_path_component_nostrip(fname_out);
 685#endif
 686
 687        /* Impossible?
 688        if ((opt & WGET_OPT_CONTINUE) && !fname_out)
 689                bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
 690        */
 691
 692        /* Determine where to start transfer */
 693        if (opt & WGET_OPT_CONTINUE) {
 694                output_fd = open(fname_out, O_WRONLY);
 695                if (output_fd >= 0) {
 696                        G.beg_range = xlseek(output_fd, 0, SEEK_END);
 697                }
 698                /* File doesn't exist. We do not create file here yet.
 699                 * We are not sure it exists on remove side */
 700        }
 701
 702        redir_limit = 5;
 703 resolve_lsa:
 704        lsa = xhost2sockaddr(server.host, server.port);
 705        if (!(opt & WGET_OPT_QUIET)) {
 706                char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
 707                fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
 708                free(s);
 709        }
 710 establish_session:
 711        if (use_proxy || !target.is_ftp) {
 712                /*
 713                 *  HTTP session
 714                 */
 715                char *str;
 716                int status;
 717
 718                /* Open socket to http server */
 719                sfp = open_socket(lsa);
 720
 721                /* Send HTTP request */
 722                if (use_proxy) {
 723                        fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
 724                                target.is_ftp ? "f" : "ht", target.host,
 725                                target.path);
 726                } else {
 727                        if (opt & WGET_OPT_POST_DATA)
 728                                fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
 729                        else
 730                                fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
 731                }
 732
 733                fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
 734                        target.host, user_agent);
 735
 736                /* Ask server to close the connection as soon as we are done
 737                 * (IOW: we do not intend to send more requests)
 738                 */
 739                fprintf(sfp, "Connection: close\r\n");
 740
 741#if ENABLE_FEATURE_WGET_AUTHENTICATION
 742                if (target.user) {
 743                        fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
 744                                base64enc_512(buf, target.user));
 745                }
 746                if (use_proxy && server.user) {
 747                        fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
 748                                base64enc_512(buf, server.user));
 749                }
 750#endif
 751
 752                if (G.beg_range)
 753                        fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
 754
 755#if ENABLE_FEATURE_WGET_LONG_OPTIONS
 756                if (extra_headers)
 757                        fputs(extra_headers, sfp);
 758
 759                if (opt & WGET_OPT_POST_DATA) {
 760                        char *estr = URL_escape(post_data);
 761                        fprintf(sfp,
 762                                "Content-Type: application/x-www-form-urlencoded\r\n"
 763                                "Content-Length: %u\r\n"
 764                                "\r\n"
 765                                "%s",
 766                                (int) strlen(estr), estr
 767                        );
 768                        free(estr);
 769                } else
 770#endif
 771                {
 772                        fprintf(sfp, "\r\n");
 773                }
 774
 775                fflush(sfp);
 776
 777                /*
 778                 * Retrieve HTTP response line and check for "200" status code.
 779                 */
 780 read_response:
 781                if (fgets(buf, sizeof(buf), sfp) == NULL)
 782                        bb_error_msg_and_die("no response from server");
 783
 784                str = buf;
 785                str = skip_non_whitespace(str);
 786                str = skip_whitespace(str);
 787                // FIXME: no error check
 788                // xatou wouldn't work: "200 OK"
 789                status = atoi(str);
 790                switch (status) {
 791                case 0:
 792                case 100:
 793                        while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
 794                                /* eat all remaining headers */;
 795                        goto read_response;
 796                case 200:
 797/*
 798Response 204 doesn't say "null file", it says "metadata
 799has changed but data didn't":
 800
 801"10.2.5 204 No Content
 802The server has fulfilled the request but does not need to return
 803an entity-body, and might want to return updated metainformation.
 804The response MAY include new or updated metainformation in the form
 805of entity-headers, which if present SHOULD be associated with
 806the requested variant.
 807
 808If the client is a user agent, it SHOULD NOT change its document
 809view from that which caused the request to be sent. This response
 810is primarily intended to allow input for actions to take place
 811without causing a change to the user agent's active document view,
 812although any new or updated metainformation SHOULD be applied
 813to the document currently in the user agent's active view.
 814
 815The 204 response MUST NOT include a message-body, and thus
 816is always terminated by the first empty line after the header fields."
 817
 818However, in real world it was observed that some web servers
 819(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
 820*/
 821                case 204:
 822                        break;
 823                case 300:  /* redirection */
 824                case 301:
 825                case 302:
 826                case 303:
 827                        break;
 828                case 206:
 829                        if (G.beg_range)
 830                                break;
 831                        /* fall through */
 832                default:
 833                        bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
 834                }
 835
 836                /*
 837                 * Retrieve HTTP headers.
 838                 */
 839                while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
 840                        /* gethdr converted "FOO:" string to lowercase */
 841                        smalluint key;
 842                        /* strip trailing whitespace */
 843                        char *s = strchrnul(str, '\0') - 1;
 844                        while (s >= str && (*s == ' ' || *s == '\t')) {
 845                                *s = '\0';
 846                                s--;
 847                        }
 848                        key = index_in_strings(keywords, buf) + 1;
 849                        if (key == KEY_content_length) {
 850                                G.content_len = BB_STRTOOFF(str, NULL, 10);
 851                                if (G.content_len < 0 || errno) {
 852                                        bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
 853                                }
 854                                G.got_clen = 1;
 855                                continue;
 856                        }
 857                        if (key == KEY_transfer_encoding) {
 858                                if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
 859                                        bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
 860                                G.chunked = G.got_clen = 1;
 861                        }
 862                        if (key == KEY_location && status >= 300) {
 863                                if (--redir_limit == 0)
 864                                        bb_error_msg_and_die("too many redirections");
 865                                fclose(sfp);
 866                                G.got_clen = 0;
 867                                G.chunked = 0;
 868                                if (str[0] == '/')
 869                                        /* free(target.allocated); */
 870                                        target.path = /* target.allocated = */ xstrdup(str+1);
 871                                        /* lsa stays the same: it's on the same server */
 872                                else {
 873                                        parse_url(str, &target);
 874                                        if (!use_proxy) {
 875                                                server.host = target.host;
 876                                                /* strip_ipv6_scope_id(target.host); - no! */
 877                                                /* we assume remote never gives us IPv6 addr with scope id */
 878                                                server.port = target.port;
 879                                                free(lsa);
 880                                                goto resolve_lsa;
 881                                        } /* else: lsa stays the same: we use proxy */
 882                                }
 883                                goto establish_session;
 884                        }
 885                }
 886//              if (status >= 300)
 887//                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
 888
 889                /* For HTTP, data is pumped over the same connection */
 890                dfp = sfp;
 891
 892        } else {
 893                /*
 894                 *  FTP session
 895                 */
 896                sfp = prepare_ftp_session(&dfp, &target, lsa);
 897        }
 898
 899        if (opt & WGET_OPT_SPIDER) {
 900                if (ENABLE_FEATURE_CLEAN_UP)
 901                        fclose(sfp);
 902                return EXIT_SUCCESS;
 903        }
 904
 905        if (output_fd < 0) {
 906                int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
 907                /* compat with wget: -O FILE can overwrite */
 908                if (opt & WGET_OPT_OUTNAME)
 909                        o_flags = O_WRONLY | O_CREAT | O_TRUNC;
 910                output_fd = xopen(fname_out, o_flags);
 911        }
 912
 913        retrieve_file_data(dfp, output_fd);
 914        xclose(output_fd);
 915
 916        if (dfp != sfp) {
 917                /* It's ftp. Close it properly */
 918                fclose(dfp);
 919                if (ftpcmd(NULL, NULL, sfp, buf) != 226)
 920                        bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
 921                /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
 922        }
 923
 924        return EXIT_SUCCESS;
 925}
 926