1
2
3
4
5
6
7
8
9#include "libbb.h"
10
11struct host_info {
12
13
14 const char *path;
15 const char *user;
16 char *host;
17 int port;
18 smallint is_ftp;
19};
20
21
22
23struct globals {
24 off_t content_len;
25 off_t beg_range;
26#if ENABLE_FEATURE_WGET_STATUSBAR
27 off_t transferred;
28 const char *curfile;
29 bb_progress_t pmt;
30#endif
31 smallint chunked;
32 smallint got_clen;
33} FIX_ALIASING;
34#define G (*(struct globals*)&bb_common_bufsiz1)
35struct BUG_G_too_big {
36 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
37};
38#define INIT_G() do { } while (0)
39
40
41#if ENABLE_FEATURE_WGET_STATUSBAR
42
43static void progress_meter(int flag)
44{
45
46 int save_errno = errno;
47
48 if (flag == -1) {
49 bb_progress_init(&G.pmt);
50 }
51
52 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
53 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
54
55 if (flag == 0) {
56
57 alarm(0);
58 bb_putchar_stderr('\n');
59 G.transferred = 0;
60 } else {
61 if (flag == -1) {
62 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
63 }
64 alarm(1);
65 }
66
67 errno = save_errno;
68}
69
70#else
71
72static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
73
74#endif
75
76
77
78
79
80
81
82
83
84
85
86
87static void strip_ipv6_scope_id(char *host)
88{
89 char *scope, *cp;
90
91
92
93
94
95 if (host[0] != '[')
96 return;
97
98 scope = strchr(host, '%');
99 if (!scope)
100 return;
101
102
103 cp = strchr(host, ']');
104 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
105
106 return;
107 }
108
109
110 overlapping_strcpy(scope, cp);
111}
112
113
114
115static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
116{
117 size_t ret;
118 char *p = (char*)ptr;
119
120 do {
121 clearerr(stream);
122 errno = 0;
123 ret = fread(p, 1, nmemb, stream);
124 p += ret;
125 nmemb -= ret;
126 } while (nmemb && ferror(stream) && errno == EINTR);
127
128 return p - (char*)ptr;
129}
130
131
132
133static char *safe_fgets(char *s, int size, FILE *stream)
134{
135 char *ret;
136
137 do {
138 clearerr(stream);
139 errno = 0;
140 ret = fgets(s, size, stream);
141 } while (ret == NULL && ferror(stream) && errno == EINTR);
142
143 return ret;
144}
145
146#if ENABLE_FEATURE_WGET_AUTHENTICATION
147
148static char *base64enc_512(char buf[512], const char *str)
149{
150 unsigned len = strlen(str);
151 if (len > 512/4*3 - 10)
152 len = 512/4*3 - 10;
153 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
154 return buf;
155}
156#endif
157
158static char* sanitize_string(char *s)
159{
160 unsigned char *p = (void *) s;
161 while (*p >= ' ')
162 p++;
163 *p = '\0';
164 return s;
165}
166
167static FILE *open_socket(len_and_sockaddr *lsa)
168{
169 FILE *fp;
170
171
172
173 fp = fdopen(xconnect_stream(lsa), "r+");
174 if (fp == NULL)
175 bb_perror_msg_and_die("fdopen");
176
177 return fp;
178}
179
180static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
181{
182 int result;
183 if (s1) {
184 if (!s2) s2 = "";
185 fprintf(fp, "%s%s\r\n", s1, s2);
186 fflush(fp);
187 }
188
189 do {
190 char *buf_ptr;
191
192 if (fgets(buf, 510, fp) == NULL) {
193 bb_perror_msg_and_die("error getting response");
194 }
195 buf_ptr = strstr(buf, "\r\n");
196 if (buf_ptr) {
197 *buf_ptr = '\0';
198 }
199 } while (!isdigit(buf[0]) || buf[3] != ' ');
200
201 buf[3] = '\0';
202 result = xatoi_u(buf);
203 buf[3] = ' ';
204 return result;
205}
206
207static void parse_url(char *src_url, struct host_info *h)
208{
209 char *url, *p, *sp;
210
211 url = xstrdup(src_url);
212
213 if (strncmp(url, "http://", 7) == 0) {
214 h->port = bb_lookup_port("http", "tcp", 80);
215 h->host = url + 7;
216 h->is_ftp = 0;
217 } else if (strncmp(url, "ftp://", 6) == 0) {
218 h->port = bb_lookup_port("ftp", "tcp", 21);
219 h->host = url + 6;
220 h->is_ftp = 1;
221 } else
222 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
223
224
225
226
227
228
229
230
231
232
233
234
235
236 sp = strchr(h->host, '/');
237 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
238 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
239 if (!sp) {
240 h->path = "";
241 } else if (*sp == '/') {
242 *sp = '\0';
243 h->path = sp + 1;
244 } else {
245
246
247
248 memmove(h->host - 1, h->host, sp - h->host);
249 h->host--;
250 sp[-1] = '\0';
251 h->path = sp;
252 }
253
254
255
256
257 sp = strrchr(h->host, '@');
258 if (sp != NULL) {
259 h->user = h->host;
260 *sp = '\0';
261 h->host = sp + 1;
262 }
263
264 sp = h->host;
265}
266
267static char *gethdr(char *buf, size_t bufsiz, FILE *fp )
268{
269 char *s, *hdrval;
270 int c;
271
272
273
274
275 if (fgets(buf, bufsiz, fp) == NULL)
276 return NULL;
277
278
279 for (s = buf; *s == '\r'; ++s)
280 continue;
281 if (*s == '\n')
282 return NULL;
283
284
285 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
286
287 *s = (*s | 0x20);
288 }
289
290
291 if (*s != ':')
292 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
293
294
295 *s++ = '\0';
296 hdrval = skip_whitespace(s);
297
298
299 while (*s && *s != '\r' && *s != '\n')
300 ++s;
301
302
303 if (*s) {
304 *s = '\0';
305 return hdrval;
306 }
307
308
309 while (c = getc(fp), c != EOF && c != '\n')
310 continue;
311
312 return hdrval;
313}
314
315#if ENABLE_FEATURE_WGET_LONG_OPTIONS
316static char *URL_escape(const char *str)
317{
318
319 char *dst;
320 char *res = dst = xmalloc(strlen(str) * 3 + 1);
321 unsigned char c;
322
323 while (1) {
324 c = *str++;
325 if (c == '\0'
326
327 || c == '!'
328 || c == '&'
329 || c == '\''
330 || c == '('
331 || c == ')'
332 || c == '*'
333 || c == '-'
334 || c == '.'
335 || c == '='
336 || c == '_'
337 || c == '~'
338 || (c >= '0' && c <= '9')
339 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
340 ) {
341 *dst++ = c;
342 if (c == '\0')
343 return res;
344 } else {
345 *dst++ = '%';
346 *dst++ = bb_hexdigits_upcase[c >> 4];
347 *dst++ = bb_hexdigits_upcase[c & 0xf];
348 }
349 }
350}
351#endif
352
353static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
354{
355 char buf[512];
356 FILE *sfp;
357 char *str;
358 int port;
359
360 if (!target->user)
361 target->user = xstrdup("anonymous:busybox@");
362
363 sfp = open_socket(lsa);
364 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
365 bb_error_msg_and_die("%s", sanitize_string(buf+4));
366
367
368
369
370
371 str = strchr(target->user, ':');
372 if (str)
373 *str++ = '\0';
374 switch (ftpcmd("USER ", target->user, sfp, buf)) {
375 case 230:
376 break;
377 case 331:
378 if (ftpcmd("PASS ", str, sfp, buf) == 230)
379 break;
380
381 default:
382 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
383 }
384
385 ftpcmd("TYPE I", NULL, sfp, buf);
386
387
388
389
390 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
391 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
392 if (G.content_len < 0 || errno) {
393 bb_error_msg_and_die("SIZE value is garbage");
394 }
395 G.got_clen = 1;
396 }
397
398
399
400
401 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
402 pasv_error:
403 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
404 }
405
406
407
408 str = strrchr(buf, ')');
409 if (str) str[0] = '\0';
410 str = strrchr(buf, ',');
411 if (!str) goto pasv_error;
412 port = xatou_range(str+1, 0, 255);
413 *str = '\0';
414 str = strrchr(buf, ',');
415 if (!str) goto pasv_error;
416 port += xatou_range(str+1, 0, 255) * 256;
417 set_nport(lsa, htons(port));
418
419 *dfpp = open_socket(lsa);
420
421 if (G.beg_range) {
422 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
423 if (ftpcmd(buf, NULL, sfp, buf) == 350)
424 G.content_len -= G.beg_range;
425 }
426
427 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
428 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
429
430 return sfp;
431}
432
433
434enum {
435 WGET_OPT_CONTINUE = (1 << 0),
436 WGET_OPT_SPIDER = (1 << 1),
437 WGET_OPT_QUIET = (1 << 2),
438 WGET_OPT_OUTNAME = (1 << 3),
439 WGET_OPT_PREFIX = (1 << 4),
440 WGET_OPT_PROXY = (1 << 5),
441 WGET_OPT_USER_AGENT = (1 << 6),
442 WGET_OPT_RETRIES = (1 << 7),
443 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
444 WGET_OPT_PASSIVE = (1 << 9),
445 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
446 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
447};
448
449static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
450{
451 char buf[512];
452
453 if (!(option_mask32 & WGET_OPT_QUIET))
454 progress_meter(-1);
455
456 if (G.chunked)
457 goto get_clen;
458
459
460 while (1) {
461 while (1) {
462 int n;
463 unsigned rdsz;
464
465 rdsz = sizeof(buf);
466 if (G.got_clen) {
467 if (G.content_len < (off_t)sizeof(buf)) {
468 if ((int)G.content_len <= 0)
469 break;
470 rdsz = (unsigned)G.content_len;
471 }
472 }
473 n = safe_fread(buf, rdsz, dfp);
474 if (n <= 0) {
475 if (ferror(dfp)) {
476
477 bb_error_msg_and_die(bb_msg_read_error);
478 }
479 break;
480 }
481 xwrite(output_fd, buf, n);
482#if ENABLE_FEATURE_WGET_STATUSBAR
483 G.transferred += n;
484#endif
485 if (G.got_clen)
486 G.content_len -= n;
487 }
488
489 if (!G.chunked)
490 break;
491
492 safe_fgets(buf, sizeof(buf), dfp);
493 get_clen:
494 safe_fgets(buf, sizeof(buf), dfp);
495 G.content_len = STRTOOFF(buf, NULL, 16);
496
497 if (G.content_len == 0)
498 break;
499 G.got_clen = 1;
500 }
501
502 if (!(option_mask32 & WGET_OPT_QUIET))
503 progress_meter(0);
504}
505
506int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
507int wget_main(int argc UNUSED_PARAM, char **argv)
508{
509 char buf[512];
510 struct host_info server, target;
511 len_and_sockaddr *lsa;
512 unsigned opt;
513 int redir_limit;
514 char *proxy = NULL;
515 char *dir_prefix = NULL;
516#if ENABLE_FEATURE_WGET_LONG_OPTIONS
517 char *post_data;
518 char *extra_headers = NULL;
519 llist_t *headers_llist = NULL;
520#endif
521 FILE *sfp;
522 FILE *dfp;
523 char *fname_out;
524 int output_fd = -1;
525 bool use_proxy;
526 const char *proxy_flag = "on";
527 const char *user_agent = "Wget";
528
529 static const char keywords[] ALIGN1 =
530 "content-length\0""transfer-encoding\0""chunked\0""location\0";
531 enum {
532 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
533 };
534#if ENABLE_FEATURE_WGET_LONG_OPTIONS
535 static const char wget_longopts[] ALIGN1 =
536
537 "continue\0" No_argument "c"
538 "spider\0" No_argument "s"
539 "quiet\0" No_argument "q"
540 "output-document\0" Required_argument "O"
541 "directory-prefix\0" Required_argument "P"
542 "proxy\0" Required_argument "Y"
543 "user-agent\0" Required_argument "U"
544
545
546
547
548 "passive-ftp\0" No_argument "\xff"
549 "header\0" Required_argument "\xfe"
550 "post-data\0" Required_argument "\xfd"
551
552 "no-check-certificate\0" No_argument "\xfc"
553 ;
554#endif
555
556 INIT_G();
557
558#if ENABLE_FEATURE_WGET_LONG_OPTIONS
559 applet_long_options = wget_longopts;
560#endif
561
562 opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
563 opt = getopt32(argv, "csqO:P:Y:U:" "t:T:",
564 &fname_out, &dir_prefix,
565 &proxy_flag, &user_agent,
566 NULL,
567 NULL
568 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
569 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
570 );
571#if ENABLE_FEATURE_WGET_LONG_OPTIONS
572 if (headers_llist) {
573 int size = 1;
574 char *cp;
575 llist_t *ll = headers_llist;
576 while (ll) {
577 size += strlen(ll->data) + 2;
578 ll = ll->link;
579 }
580 extra_headers = cp = xmalloc(size);
581 while (headers_llist) {
582 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
583 }
584 }
585#endif
586
587
588
589 target.user = NULL;
590 parse_url(argv[optind], &target);
591
592
593 use_proxy = (strcmp(proxy_flag, "off") != 0);
594 if (use_proxy) {
595 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
596 if (proxy && proxy[0]) {
597 server.user = NULL;
598 parse_url(proxy, &server);
599 } else {
600 use_proxy = 0;
601 }
602 }
603 if (!use_proxy) {
604 server.port = target.port;
605 if (ENABLE_FEATURE_IPV6) {
606 server.host = xstrdup(target.host);
607 } else {
608 server.host = target.host;
609 }
610 }
611
612 if (ENABLE_FEATURE_IPV6)
613 strip_ipv6_scope_id(target.host);
614
615
616 if (!(opt & WGET_OPT_OUTNAME)) {
617 fname_out = bb_get_last_path_component_nostrip(target.path);
618
619 if (fname_out[0] == '/' || !fname_out[0])
620 fname_out = (char*)"index.html";
621
622 if (dir_prefix)
623 fname_out = concat_path_file(dir_prefix, fname_out);
624 } else {
625 if (LONE_DASH(fname_out)) {
626
627 output_fd = 1;
628 opt &= ~WGET_OPT_CONTINUE;
629 }
630 }
631#if ENABLE_FEATURE_WGET_STATUSBAR
632 G.curfile = bb_get_last_path_component_nostrip(fname_out);
633#endif
634
635
636
637
638
639
640
641 if (opt & WGET_OPT_CONTINUE) {
642 output_fd = open(fname_out, O_WRONLY);
643 if (output_fd >= 0) {
644 G.beg_range = xlseek(output_fd, 0, SEEK_END);
645 }
646
647
648 }
649
650 redir_limit = 5;
651 resolve_lsa:
652 lsa = xhost2sockaddr(server.host, server.port);
653 if (!(opt & WGET_OPT_QUIET)) {
654 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
655 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
656 free(s);
657 }
658 establish_session:
659 if (use_proxy || !target.is_ftp) {
660
661
662
663 char *str;
664 int status;
665
666
667 sfp = open_socket(lsa);
668
669
670 if (use_proxy) {
671 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
672 target.is_ftp ? "f" : "ht", target.host,
673 target.path);
674 } else {
675 if (opt & WGET_OPT_POST_DATA)
676 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
677 else
678 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
679 }
680
681 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
682 target.host, user_agent);
683
684#if ENABLE_FEATURE_WGET_AUTHENTICATION
685 if (target.user) {
686 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
687 base64enc_512(buf, target.user));
688 }
689 if (use_proxy && server.user) {
690 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
691 base64enc_512(buf, server.user));
692 }
693#endif
694
695 if (G.beg_range)
696 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
697#if ENABLE_FEATURE_WGET_LONG_OPTIONS
698 if (extra_headers)
699 fputs(extra_headers, sfp);
700
701 if (opt & WGET_OPT_POST_DATA) {
702 char *estr = URL_escape(post_data);
703 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
704 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
705 (int) strlen(estr), estr);
706
707
708 free(estr);
709 } else
710#endif
711 {
712 fprintf(sfp, "\r\n");
713 }
714
715
716
717
718 read_response:
719 if (fgets(buf, sizeof(buf), sfp) == NULL)
720 bb_error_msg_and_die("no response from server");
721
722 str = buf;
723 str = skip_non_whitespace(str);
724 str = skip_whitespace(str);
725
726
727 status = atoi(str);
728 switch (status) {
729 case 0:
730 case 100:
731 while (gethdr(buf, sizeof(buf), sfp ) != NULL)
732 ;
733 goto read_response;
734 case 200:
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759 case 204:
760 break;
761 case 300:
762 case 301:
763 case 302:
764 case 303:
765 break;
766 case 206:
767 if (G.beg_range)
768 break;
769
770 default:
771 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
772 }
773
774
775
776
777 while ((str = gethdr(buf, sizeof(buf), sfp )) != NULL) {
778
779 smalluint key;
780
781 char *s = strchrnul(str, '\0') - 1;
782 while (s >= str && (*s == ' ' || *s == '\t')) {
783 *s = '\0';
784 s--;
785 }
786 key = index_in_strings(keywords, buf) + 1;
787 if (key == KEY_content_length) {
788 G.content_len = BB_STRTOOFF(str, NULL, 10);
789 if (G.content_len < 0 || errno) {
790 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
791 }
792 G.got_clen = 1;
793 continue;
794 }
795 if (key == KEY_transfer_encoding) {
796 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
797 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
798 G.chunked = G.got_clen = 1;
799 }
800 if (key == KEY_location && status >= 300) {
801 if (--redir_limit == 0)
802 bb_error_msg_and_die("too many redirections");
803 fclose(sfp);
804 G.got_clen = 0;
805 G.chunked = 0;
806 if (str[0] == '/')
807
808 target.path = xstrdup(str+1);
809
810 else {
811 parse_url(str, &target);
812 if (!use_proxy) {
813 server.host = target.host;
814
815
816 server.port = target.port;
817 free(lsa);
818 goto resolve_lsa;
819 }
820 }
821 goto establish_session;
822 }
823 }
824
825
826
827
828 dfp = sfp;
829
830 } else {
831
832
833
834 sfp = prepare_ftp_session(&dfp, &target, lsa);
835 }
836
837 if (opt & WGET_OPT_SPIDER) {
838 if (ENABLE_FEATURE_CLEAN_UP)
839 fclose(sfp);
840 return EXIT_SUCCESS;
841 }
842
843 if (output_fd < 0) {
844 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
845
846 if (opt & WGET_OPT_OUTNAME)
847 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
848 output_fd = xopen(fname_out, o_flags);
849 }
850
851 retrieve_file_data(dfp, output_fd);
852 xclose(output_fd);
853
854 if (dfp != sfp) {
855
856 fclose(dfp);
857 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
858 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
859
860 }
861
862 return EXIT_SUCCESS;
863}
864