busybox/coreutils/wc.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * wc implementation for busybox
   4 *
   5 * Copyright (C) 2003  Manuel Novoa III  <mjn3@codepoet.org>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
   8 */
   9
  10/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */
  11/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
  12
  13/* Mar 16, 2003      Manuel Novoa III   (mjn3@codepoet.org)
  14 *
  15 * Rewritten to fix a number of problems and do some size optimizations.
  16 * Problems in the previous busybox implementation (besides bloat) included:
  17 *  1) broken 'wc -c' optimization (read note below)
  18 *  2) broken handling of '-' args
  19 *  3) no checking of ferror on EOF returns
  20 *  4) isprint() wasn't considered when word counting.
  21 *
  22 * TODO:
  23 *
  24 * When locale support is enabled, count multibyte chars in the '-m' case.
  25 *
  26 * NOTES:
  27 *
  28 * The previous busybox wc attempted an optimization using stat for the
  29 * case of counting chars only.  I omitted that because it was broken.
  30 * It didn't take into account the possibility of input coming from a
  31 * pipe, or input from a file with file pointer not at the beginning.
  32 *
  33 * To implement such a speed optimization correctly, not only do you
  34 * need the size, but also the file position.  Note also that the
  35 * file position may be past the end of file.  Consider the example
  36 * (adapted from example in gnu wc.c)
  37 *
  38 *      echo hello > /tmp/testfile &&
  39 *      (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
  40 *
  41 * for which 'wc -c' should output '0'.
  42 */
  43
  44#include "libbb.h"
  45
  46#if ENABLE_LOCALE_SUPPORT
  47#define isspace_given_isprint(c) isspace(c)
  48#else
  49#undef isspace
  50#undef isprint
  51#define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9))))
  52#define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20))
  53#define isspace_given_isprint(c) ((c) == ' ')
  54#endif
  55
  56#if ENABLE_FEATURE_WC_LARGE
  57#define COUNT_T unsigned long long
  58#define COUNT_FMT "llu"
  59#else
  60#define COUNT_T unsigned
  61#define COUNT_FMT "u"
  62#endif
  63
  64enum {
  65        WC_LINES        = 0,
  66        WC_WORDS        = 1,
  67        WC_CHARS        = 2,
  68        WC_LENGTH       = 3
  69};
  70
  71int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  72int wc_main(int argc UNUSED_PARAM, char **argv)
  73{
  74        FILE *fp;
  75        const char *s, *arg;
  76        const char *start_fmt = " %9"COUNT_FMT + 1;
  77        const char *fname_fmt = " %s\n";
  78        COUNT_T *pcounts;
  79        COUNT_T counts[4];
  80        COUNT_T totals[4];
  81        unsigned linepos;
  82        unsigned u;
  83        int num_files = 0;
  84        int c;
  85        smallint status = EXIT_SUCCESS;
  86        smallint in_word;
  87        unsigned print_type;
  88
  89        print_type = getopt32(argv, "lwcL");
  90
  91        if (print_type == 0) {
  92                print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS);
  93        }
  94
  95        argv += optind;
  96        if (!argv[0]) {
  97                *--argv = (char *) bb_msg_standard_input;
  98                fname_fmt = "\n";
  99                if (!((print_type-1) & print_type)) /* exactly one option? */
 100                        start_fmt = "%"COUNT_FMT;
 101        }
 102
 103        memset(totals, 0, sizeof(totals));
 104
 105        pcounts = counts;
 106
 107        while ((arg = *argv++) != 0) {
 108                ++num_files;
 109                fp = fopen_or_warn_stdin(arg);
 110                if (!fp) {
 111                        status = EXIT_FAILURE;
 112                        continue;
 113                }
 114
 115                memset(counts, 0, sizeof(counts));
 116                linepos = 0;
 117                in_word = 0;
 118
 119                do {
 120                        /* Our -w doesn't match GNU wc exactly... oh well */
 121
 122                        ++counts[WC_CHARS];
 123                        c = getc(fp);
 124                        if (isprint(c)) {
 125                                ++linepos;
 126                                if (!isspace_given_isprint(c)) {
 127                                        in_word = 1;
 128                                        continue;
 129                                }
 130                        } else if (((unsigned int)(c - 9)) <= 4) {
 131                                /* \t  9
 132                                 * \n 10
 133                                 * \v 11
 134                                 * \f 12
 135                                 * \r 13
 136                                 */
 137                                if (c == '\t') {
 138                                        linepos = (linepos | 7) + 1;
 139                                } else {                        /* '\n', '\r', '\f', or '\v' */
 140                                DO_EOF:
 141                                        if (linepos > counts[WC_LENGTH]) {
 142                                                counts[WC_LENGTH] = linepos;
 143                                        }
 144                                        if (c == '\n') {
 145                                                ++counts[WC_LINES];
 146                                        }
 147                                        if (c != '\v') {
 148                                                linepos = 0;
 149                                        }
 150                                }
 151                        } else if (c == EOF) {
 152                                if (ferror(fp)) {
 153                                        bb_simple_perror_msg(arg);
 154                                        status = EXIT_FAILURE;
 155                                }
 156                                --counts[WC_CHARS];
 157                                goto DO_EOF;            /* Treat an EOF as '\r'. */
 158                        } else {
 159                                continue;
 160                        }
 161
 162                        counts[WC_WORDS] += in_word;
 163                        in_word = 0;
 164                        if (c == EOF) {
 165                                break;
 166                        }
 167                } while (1);
 168
 169                if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
 170                        totals[WC_LENGTH] = counts[WC_LENGTH];
 171                }
 172                totals[WC_LENGTH] -= counts[WC_LENGTH];
 173
 174                fclose_if_not_stdin(fp);
 175
 176        OUTPUT:
 177                /* coreutils wc tries hard to print pretty columns
 178                 * (saves results for all files, find max col len etc...)
 179                 * we won't try that hard, it will bloat us too much */
 180                s = start_fmt;
 181                u = 0;
 182                do {
 183                        if (print_type & (1 << u)) {
 184                                printf(s, pcounts[u]);
 185                                s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
 186                        }
 187                        totals[u] += pcounts[u];
 188                } while (++u < 4);
 189                printf(fname_fmt, arg);
 190        }
 191
 192        /* If more than one file was processed, we want the totals.  To save some
 193         * space, we set the pcounts ptr to the totals array.  This has the side
 194         * effect of trashing the totals array after outputting it, but that's
 195         * irrelavent since we no longer need it. */
 196        if (num_files > 1) {
 197                num_files = 0;                          /* Make sure we don't get here again. */
 198                arg = "total";
 199                pcounts = totals;
 200                --argv;
 201                goto OUTPUT;
 202        }
 203
 204        fflush_stdout_and_exit(status);
 205}
 206