1
2
3
4
5
6
7
8
9#include "libbb.h"
10
11struct host_info {
12
13
14 const char *path;
15 const char *user;
16 char *host;
17 int port;
18 smallint is_ftp;
19};
20
21
22
23struct globals {
24 off_t content_len;
25 off_t beg_range;
26#if ENABLE_FEATURE_WGET_STATUSBAR
27 off_t transferred;
28 const char *curfile;
29 bb_progress_t pmt;
30#endif
31 smallint chunked;
32 smallint got_clen;
33};
34#define G (*(struct globals*)&bb_common_bufsiz1)
35struct BUG_G_too_big {
36 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
37};
38#define INIT_G() do { } while (0)
39
40
41#if ENABLE_FEATURE_WGET_STATUSBAR
42
43static void progress_meter(int flag)
44{
45
46 int save_errno = errno;
47
48 if (flag == -1) {
49 bb_progress_init(&G.pmt);
50 }
51
52 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
53 G.chunked ? 0 : G.content_len + G.beg_range);
54
55 if (flag == 0) {
56
57 alarm(0);
58 fputc('\n', stderr);
59 G.transferred = 0;
60 } else {
61 if (flag == -1) {
62 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
63 }
64 alarm(1);
65 }
66
67 errno = save_errno;
68}
69
70#else
71
72static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
73
74#endif
75
76
77
78
79
80
81
82
83
84
85
86
87static void strip_ipv6_scope_id(char *host)
88{
89 char *scope, *cp;
90
91
92
93
94
95 if (host[0] != '[')
96 return;
97
98 scope = strchr(host, '%');
99 if (!scope)
100 return;
101
102
103 cp = strchr(host, ']');
104 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
105
106 return;
107 }
108
109
110 overlapping_strcpy(scope, cp);
111}
112
113
114
115static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
116{
117 size_t ret;
118 char *p = (char*)ptr;
119
120 do {
121 clearerr(stream);
122 errno = 0;
123 ret = fread(p, 1, nmemb, stream);
124 p += ret;
125 nmemb -= ret;
126 } while (nmemb && ferror(stream) && errno == EINTR);
127
128 return p - (char*)ptr;
129}
130
131
132
133static char *safe_fgets(char *s, int size, FILE *stream)
134{
135 char *ret;
136
137 do {
138 clearerr(stream);
139 errno = 0;
140 ret = fgets(s, size, stream);
141 } while (ret == NULL && ferror(stream) && errno == EINTR);
142
143 return ret;
144}
145
146#if ENABLE_FEATURE_WGET_AUTHENTICATION
147
148static char *base64enc_512(char buf[512], const char *str)
149{
150 unsigned len = strlen(str);
151 if (len > 512/4*3 - 10)
152 len = 512/4*3 - 10;
153 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
154 return buf;
155}
156#endif
157
158static char* sanitize_string(char *s)
159{
160 unsigned char *p = (void *) s;
161 while (*p >= ' ')
162 p++;
163 *p = '\0';
164 return s;
165}
166
167static FILE *open_socket(len_and_sockaddr *lsa)
168{
169 FILE *fp;
170
171
172
173 fp = fdopen(xconnect_stream(lsa), "r+");
174 if (fp == NULL)
175 bb_perror_msg_and_die("fdopen");
176
177 return fp;
178}
179
180static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
181{
182 int result;
183 if (s1) {
184 if (!s2) s2 = "";
185 fprintf(fp, "%s%s\r\n", s1, s2);
186 fflush(fp);
187 }
188
189 do {
190 char *buf_ptr;
191
192 if (fgets(buf, 510, fp) == NULL) {
193 bb_perror_msg_and_die("error getting response");
194 }
195 buf_ptr = strstr(buf, "\r\n");
196 if (buf_ptr) {
197 *buf_ptr = '\0';
198 }
199 } while (!isdigit(buf[0]) || buf[3] != ' ');
200
201 buf[3] = '\0';
202 result = xatoi_u(buf);
203 buf[3] = ' ';
204 return result;
205}
206
207static void parse_url(char *src_url, struct host_info *h)
208{
209 char *url, *p, *sp;
210
211 url = xstrdup(src_url);
212
213 if (strncmp(url, "http://", 7) == 0) {
214 h->port = bb_lookup_port("http", "tcp", 80);
215 h->host = url + 7;
216 h->is_ftp = 0;
217 } else if (strncmp(url, "ftp://", 6) == 0) {
218 h->port = bb_lookup_port("ftp", "tcp", 21);
219 h->host = url + 6;
220 h->is_ftp = 1;
221 } else
222 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
223
224
225
226
227
228
229
230
231
232
233
234
235
236 sp = strchr(h->host, '/');
237 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
238 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
239 if (!sp) {
240 h->path = "";
241 } else if (*sp == '/') {
242 *sp = '\0';
243 h->path = sp + 1;
244 } else {
245
246
247
248 memmove(h->host - 1, h->host, sp - h->host);
249 h->host--;
250 sp[-1] = '\0';
251 h->path = sp;
252 }
253
254
255
256
257 sp = strrchr(h->host, '@');
258 if (sp != NULL) {
259 h->user = h->host;
260 *sp = '\0';
261 h->host = sp + 1;
262 }
263
264 sp = h->host;
265}
266
267static char *gethdr(char *buf, size_t bufsiz, FILE *fp )
268{
269 char *s, *hdrval;
270 int c;
271
272
273
274
275 if (fgets(buf, bufsiz, fp) == NULL)
276 return NULL;
277
278
279 for (s = buf; *s == '\r'; ++s)
280 continue;
281 if (*s == '\n')
282 return NULL;
283
284
285 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
286 *s = tolower(*s);
287
288
289 if (*s != ':')
290 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
291
292
293 *s++ = '\0';
294 hdrval = skip_whitespace(s);
295
296
297 while (*s && *s != '\r' && *s != '\n')
298 ++s;
299
300
301 if (*s) {
302 *s = '\0';
303 return hdrval;
304 }
305
306
307 while (c = getc(fp), c != EOF && c != '\n')
308 continue;
309
310 return hdrval;
311}
312
313#if ENABLE_FEATURE_WGET_LONG_OPTIONS
314static char *URL_escape(const char *str)
315{
316
317 char *dst;
318 char *res = dst = xmalloc(strlen(str) * 3 + 1);
319 unsigned char c;
320
321 while (1) {
322 c = *str++;
323 if (c == '\0'
324
325 || c == '!'
326 || c == '&'
327 || c == '\''
328 || c == '('
329 || c == ')'
330 || c == '*'
331 || c == '-'
332 || c == '.'
333 || c == '='
334 || c == '_'
335 || c == '~'
336 || (c >= '0' && c <= '9')
337 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
338 ) {
339 *dst++ = c;
340 if (c == '\0')
341 return res;
342 } else {
343 *dst++ = '%';
344 *dst++ = bb_hexdigits_upcase[c >> 4];
345 *dst++ = bb_hexdigits_upcase[c & 0xf];
346 }
347 }
348}
349#endif
350
351static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
352{
353 char buf[512];
354 FILE *sfp;
355 char *str;
356 int port;
357
358 if (!target->user)
359 target->user = xstrdup("anonymous:busybox@");
360
361 sfp = open_socket(lsa);
362 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
363 bb_error_msg_and_die("%s", sanitize_string(buf+4));
364
365
366
367
368
369 str = strchr(target->user, ':');
370 if (str)
371 *str++ = '\0';
372 switch (ftpcmd("USER ", target->user, sfp, buf)) {
373 case 230:
374 break;
375 case 331:
376 if (ftpcmd("PASS ", str, sfp, buf) == 230)
377 break;
378
379 default:
380 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
381 }
382
383 ftpcmd("TYPE I", NULL, sfp, buf);
384
385
386
387
388 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
389 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
390 if (G.content_len < 0 || errno) {
391 bb_error_msg_and_die("SIZE value is garbage");
392 }
393 G.got_clen = 1;
394 }
395
396
397
398
399 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
400 pasv_error:
401 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
402 }
403
404
405
406 str = strrchr(buf, ')');
407 if (str) str[0] = '\0';
408 str = strrchr(buf, ',');
409 if (!str) goto pasv_error;
410 port = xatou_range(str+1, 0, 255);
411 *str = '\0';
412 str = strrchr(buf, ',');
413 if (!str) goto pasv_error;
414 port += xatou_range(str+1, 0, 255) * 256;
415 set_nport(lsa, htons(port));
416
417 *dfpp = open_socket(lsa);
418
419 if (G.beg_range) {
420 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
421 if (ftpcmd(buf, NULL, sfp, buf) == 350)
422 G.content_len -= G.beg_range;
423 }
424
425 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
426 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
427
428 return sfp;
429}
430
431
432enum {
433 WGET_OPT_CONTINUE = (1 << 0),
434 WGET_OPT_SPIDER = (1 << 1),
435 WGET_OPT_QUIET = (1 << 2),
436 WGET_OPT_OUTNAME = (1 << 3),
437 WGET_OPT_PREFIX = (1 << 4),
438 WGET_OPT_PROXY = (1 << 5),
439 WGET_OPT_USER_AGENT = (1 << 6),
440 WGET_OPT_RETRIES = (1 << 7),
441 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
442 WGET_OPT_PASSIVE = (1 << 9),
443 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
444 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
445};
446
447static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
448{
449 char buf[512];
450
451 if (!(option_mask32 & WGET_OPT_QUIET))
452 progress_meter(-1);
453
454 if (G.chunked)
455 goto get_clen;
456
457
458 while (1) {
459 while (1) {
460 int n;
461 unsigned rdsz;
462
463 rdsz = sizeof(buf);
464 if (G.got_clen) {
465 if (G.content_len < (off_t)sizeof(buf)) {
466 if ((int)G.content_len <= 0)
467 break;
468 rdsz = (unsigned)G.content_len;
469 }
470 }
471 n = safe_fread(buf, rdsz, dfp);
472 if (n <= 0) {
473 if (ferror(dfp)) {
474
475 bb_error_msg_and_die(bb_msg_read_error);
476 }
477 break;
478 }
479 xwrite(output_fd, buf, n);
480#if ENABLE_FEATURE_WGET_STATUSBAR
481 G.transferred += n;
482#endif
483 if (G.got_clen)
484 G.content_len -= n;
485 }
486
487 if (!G.chunked)
488 break;
489
490 safe_fgets(buf, sizeof(buf), dfp);
491 get_clen:
492 safe_fgets(buf, sizeof(buf), dfp);
493 G.content_len = STRTOOFF(buf, NULL, 16);
494
495 if (G.content_len == 0)
496 break;
497 G.got_clen = 1;
498 }
499
500 if (!(option_mask32 & WGET_OPT_QUIET))
501 progress_meter(0);
502}
503
504int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
505int wget_main(int argc UNUSED_PARAM, char **argv)
506{
507 char buf[512];
508 struct host_info server, target;
509 len_and_sockaddr *lsa;
510 unsigned opt;
511 int redir_limit;
512 char *proxy = NULL;
513 char *dir_prefix = NULL;
514#if ENABLE_FEATURE_WGET_LONG_OPTIONS
515 char *post_data;
516 char *extra_headers = NULL;
517 llist_t *headers_llist = NULL;
518#endif
519 FILE *sfp;
520 FILE *dfp;
521 char *fname_out;
522 int output_fd = -1;
523 bool use_proxy;
524 const char *proxy_flag = "on";
525 const char *user_agent = "Wget";
526
527 static const char keywords[] ALIGN1 =
528 "content-length\0""transfer-encoding\0""chunked\0""location\0";
529 enum {
530 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
531 };
532#if ENABLE_FEATURE_WGET_LONG_OPTIONS
533 static const char wget_longopts[] ALIGN1 =
534
535 "continue\0" No_argument "c"
536 "spider\0" No_argument "s"
537 "quiet\0" No_argument "q"
538 "output-document\0" Required_argument "O"
539 "directory-prefix\0" Required_argument "P"
540 "proxy\0" Required_argument "Y"
541 "user-agent\0" Required_argument "U"
542
543
544
545
546 "passive-ftp\0" No_argument "\xff"
547 "header\0" Required_argument "\xfe"
548 "post-data\0" Required_argument "\xfd"
549 ;
550#endif
551
552 INIT_G();
553
554#if ENABLE_FEATURE_WGET_LONG_OPTIONS
555 applet_long_options = wget_longopts;
556#endif
557
558 opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
559 opt = getopt32(argv, "csqO:P:Y:U:" "t:T:",
560 &fname_out, &dir_prefix,
561 &proxy_flag, &user_agent,
562 NULL,
563 NULL
564 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
565 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
566 );
567#if ENABLE_FEATURE_WGET_LONG_OPTIONS
568 if (headers_llist) {
569 int size = 1;
570 char *cp;
571 llist_t *ll = headers_llist;
572 while (ll) {
573 size += strlen(ll->data) + 2;
574 ll = ll->link;
575 }
576 extra_headers = cp = xmalloc(size);
577 while (headers_llist) {
578 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
579 }
580 }
581#endif
582
583
584
585 target.user = NULL;
586 parse_url(argv[optind], &target);
587
588
589 use_proxy = (strcmp(proxy_flag, "off") != 0);
590 if (use_proxy) {
591 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
592 if (proxy && proxy[0]) {
593 parse_url(proxy, &server);
594 } else {
595 use_proxy = 0;
596 }
597 }
598 if (!use_proxy) {
599 server.port = target.port;
600 if (ENABLE_FEATURE_IPV6) {
601 server.host = xstrdup(target.host);
602 } else {
603 server.host = target.host;
604 }
605 }
606
607 if (ENABLE_FEATURE_IPV6)
608 strip_ipv6_scope_id(target.host);
609
610
611 if (!(opt & WGET_OPT_OUTNAME)) {
612 fname_out = bb_get_last_path_component_nostrip(target.path);
613
614 if (fname_out[0] == '/' || !fname_out[0])
615 fname_out = (char*)"index.html";
616
617 if (dir_prefix)
618 fname_out = concat_path_file(dir_prefix, fname_out);
619 } else {
620 if (LONE_DASH(fname_out)) {
621
622 output_fd = 1;
623 opt &= ~WGET_OPT_CONTINUE;
624 }
625 }
626#if ENABLE_FEATURE_WGET_STATUSBAR
627 G.curfile = bb_get_last_path_component_nostrip(fname_out);
628#endif
629
630
631
632
633
634
635
636 if (opt & WGET_OPT_CONTINUE) {
637 output_fd = open(fname_out, O_WRONLY);
638 if (output_fd >= 0) {
639 G.beg_range = xlseek(output_fd, 0, SEEK_END);
640 }
641
642
643 }
644
645 redir_limit = 5;
646 resolve_lsa:
647 lsa = xhost2sockaddr(server.host, server.port);
648 if (!(opt & WGET_OPT_QUIET)) {
649 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
650 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
651 free(s);
652 }
653 establish_session:
654 if (use_proxy || !target.is_ftp) {
655
656
657
658 char *str;
659 int status;
660
661
662 sfp = open_socket(lsa);
663
664
665 if (use_proxy) {
666 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
667 target.is_ftp ? "f" : "ht", target.host,
668 target.path);
669 } else {
670 if (opt & WGET_OPT_POST_DATA)
671 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
672 else
673 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
674 }
675
676 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
677 target.host, user_agent);
678
679#if ENABLE_FEATURE_WGET_AUTHENTICATION
680 if (target.user) {
681 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
682 base64enc_512(buf, target.user));
683 }
684 if (use_proxy && server.user) {
685 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
686 base64enc_512(buf, server.user));
687 }
688#endif
689
690 if (G.beg_range)
691 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
692#if ENABLE_FEATURE_WGET_LONG_OPTIONS
693 if (extra_headers)
694 fputs(extra_headers, sfp);
695
696 if (opt & WGET_OPT_POST_DATA) {
697 char *estr = URL_escape(post_data);
698 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
699 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
700 (int) strlen(estr), estr);
701
702
703 free(estr);
704 } else
705#endif
706 {
707 fprintf(sfp, "\r\n");
708 }
709
710
711
712
713 read_response:
714 if (fgets(buf, sizeof(buf), sfp) == NULL)
715 bb_error_msg_and_die("no response from server");
716
717 str = buf;
718 str = skip_non_whitespace(str);
719 str = skip_whitespace(str);
720
721
722 status = atoi(str);
723 switch (status) {
724 case 0:
725 case 100:
726 while (gethdr(buf, sizeof(buf), sfp ) != NULL)
727 ;
728 goto read_response;
729 case 200:
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754 case 204:
755 break;
756 case 300:
757 case 301:
758 case 302:
759 case 303:
760 break;
761 case 206:
762 if (G.beg_range)
763 break;
764
765 default:
766 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
767 }
768
769
770
771
772 while ((str = gethdr(buf, sizeof(buf), sfp )) != NULL) {
773
774 smalluint key;
775
776 char *s = strchrnul(str, '\0') - 1;
777 while (s >= str && (*s == ' ' || *s == '\t')) {
778 *s = '\0';
779 s--;
780 }
781 key = index_in_strings(keywords, buf) + 1;
782 if (key == KEY_content_length) {
783 G.content_len = BB_STRTOOFF(str, NULL, 10);
784 if (G.content_len < 0 || errno) {
785 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
786 }
787 G.got_clen = 1;
788 continue;
789 }
790 if (key == KEY_transfer_encoding) {
791 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
792 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
793 G.chunked = G.got_clen = 1;
794 }
795 if (key == KEY_location && status >= 300) {
796 if (--redir_limit == 0)
797 bb_error_msg_and_die("too many redirections");
798 fclose(sfp);
799 G.got_clen = 0;
800 G.chunked = 0;
801 if (str[0] == '/')
802
803 target.path = xstrdup(str+1);
804
805 else {
806 parse_url(str, &target);
807 if (!use_proxy) {
808 server.host = target.host;
809
810
811 server.port = target.port;
812 free(lsa);
813 goto resolve_lsa;
814 }
815 }
816 goto establish_session;
817 }
818 }
819
820
821
822
823 dfp = sfp;
824
825 } else {
826
827
828
829 sfp = prepare_ftp_session(&dfp, &target, lsa);
830 }
831
832 if (opt & WGET_OPT_SPIDER) {
833 if (ENABLE_FEATURE_CLEAN_UP)
834 fclose(sfp);
835 return EXIT_SUCCESS;
836 }
837
838 if (output_fd < 0) {
839 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
840
841 if (opt & WGET_OPT_OUTNAME)
842 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
843 output_fd = xopen(fname_out, o_flags);
844 }
845
846 retrieve_file_data(dfp, output_fd);
847 xclose(output_fd);
848
849 if (dfp != sfp) {
850
851 fclose(dfp);
852 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
853 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
854
855 }
856
857 return EXIT_SUCCESS;
858}
859