busybox/coreutils/cut.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * cut.c - minimalist version of cut
   4 *
   5 * Copyright (C) 1999,2000,2001 by Lineo, inc.
   6 * Written by Mark Whitley <markw@codepoet.org>
   7 * debloated by Bernhard Reutner-Fischer
   8 *
   9 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  10 */
  11
  12#include "libbb.h"
  13
  14/* This is a NOEXEC applet. Be very careful! */
  15
  16
  17/* option vars */
  18static const char optstring[] ALIGN1 = "b:c:f:d:sn";
  19#define CUT_OPT_BYTE_FLGS     (1 << 0)
  20#define CUT_OPT_CHAR_FLGS     (1 << 1)
  21#define CUT_OPT_FIELDS_FLGS   (1 << 2)
  22#define CUT_OPT_DELIM_FLGS    (1 << 3)
  23#define CUT_OPT_SUPPRESS_FLGS (1 << 4)
  24
  25struct cut_list {
  26        int startpos;
  27        int endpos;
  28};
  29
  30enum {
  31        BOL = 0,
  32        EOL = INT_MAX,
  33        NON_RANGE = -1
  34};
  35
  36static int cmpfunc(const void *a, const void *b)
  37{
  38        return (((struct cut_list *) a)->startpos -
  39                        ((struct cut_list *) b)->startpos);
  40
  41}
  42
  43static void cut_file(FILE *file, char delim, const struct cut_list *cut_lists, unsigned nlists)
  44{
  45        char *line;
  46        unsigned linenum = 0;   /* keep these zero-based to be consistent */
  47
  48        /* go through every line in the file */
  49        while ((line = xmalloc_fgetline(file)) != NULL) {
  50
  51                /* set up a list so we can keep track of what's been printed */
  52                int linelen = strlen(line);
  53                char *printed = xzalloc(linelen + 1);
  54                char *orig_line = line;
  55                unsigned cl_pos = 0;
  56                int spos;
  57
  58                /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
  59                if (option_mask32 & (CUT_OPT_CHAR_FLGS | CUT_OPT_BYTE_FLGS)) {
  60                        /* print the chars specified in each cut list */
  61                        for (; cl_pos < nlists; cl_pos++) {
  62                                spos = cut_lists[cl_pos].startpos;
  63                                while (spos < linelen) {
  64                                        if (!printed[spos]) {
  65                                                printed[spos] = 'X';
  66                                                putchar(line[spos]);
  67                                        }
  68                                        spos++;
  69                                        if (spos > cut_lists[cl_pos].endpos
  70                                        /* NON_RANGE is -1, so if below is true,
  71                                         * the above was true too (spos is >= 0) */
  72                                        /* || cut_lists[cl_pos].endpos == NON_RANGE */
  73                                        ) {
  74                                                break;
  75                                        }
  76                                }
  77                        }
  78                } else if (delim == '\n') {     /* cut by lines */
  79                        spos = cut_lists[cl_pos].startpos;
  80
  81                        /* get out if we have no more lists to process or if the lines
  82                         * are lower than what we're interested in */
  83                        if (((int)linenum < spos) || (cl_pos >= nlists))
  84                                goto next_line;
  85
  86                        /* if the line we're looking for is lower than the one we were
  87                         * passed, it means we displayed it already, so move on */
  88                        while (spos < (int)linenum) {
  89                                spos++;
  90                                /* go to the next list if we're at the end of this one */
  91                                if (spos > cut_lists[cl_pos].endpos
  92                                 || cut_lists[cl_pos].endpos == NON_RANGE
  93                                ) {
  94                                        cl_pos++;
  95                                        /* get out if there's no more lists to process */
  96                                        if (cl_pos >= nlists)
  97                                                goto next_line;
  98                                        spos = cut_lists[cl_pos].startpos;
  99                                        /* get out if the current line is lower than the one
 100                                         * we just became interested in */
 101                                        if ((int)linenum < spos)
 102                                                goto next_line;
 103                                }
 104                        }
 105
 106                        /* If we made it here, it means we've found the line we're
 107                         * looking for, so print it */
 108                        puts(line);
 109                        goto next_line;
 110                } else {                /* cut by fields */
 111                        int ndelim = -1;        /* zero-based / one-based problem */
 112                        int nfields_printed = 0;
 113                        char *field = NULL;
 114                        const char delimiter[2] = { delim, 0 };
 115
 116                        /* does this line contain any delimiters? */
 117                        if (strchr(line, delim) == NULL) {
 118                                if (!(option_mask32 & CUT_OPT_SUPPRESS_FLGS))
 119                                        puts(line);
 120                                goto next_line;
 121                        }
 122
 123                        /* process each list on this line, for as long as we've got
 124                         * a line to process */
 125                        for (; cl_pos < nlists && line; cl_pos++) {
 126                                spos = cut_lists[cl_pos].startpos;
 127                                do {
 128                                        /* find the field we're looking for */
 129                                        while (line && ndelim < spos) {
 130                                                field = strsep(&line, delimiter);
 131                                                ndelim++;
 132                                        }
 133
 134                                        /* we found it, and it hasn't been printed yet */
 135                                        if (field && ndelim == spos && !printed[ndelim]) {
 136                                                /* if this isn't our first time through, we need to
 137                                                 * print the delimiter after the last field that was
 138                                                 * printed */
 139                                                if (nfields_printed > 0)
 140                                                        putchar(delim);
 141                                                fputs(field, stdout);
 142                                                printed[ndelim] = 'X';
 143                                                nfields_printed++;      /* shouldn't overflow.. */
 144                                        }
 145
 146                                        spos++;
 147
 148                                        /* keep going as long as we have a line to work with,
 149                                         * this is a list, and we're not at the end of that
 150                                         * list */
 151                                } while (spos <= cut_lists[cl_pos].endpos && line
 152                                                && cut_lists[cl_pos].endpos != NON_RANGE);
 153                        }
 154                }
 155                /* if we printed anything at all, we need to finish it with a
 156                 * newline cuz we were handed a chomped line */
 157                putchar('\n');
 158 next_line:
 159                linenum++;
 160                free(printed);
 161                free(orig_line);
 162        }
 163}
 164
 165int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 166int cut_main(int argc UNUSED_PARAM, char **argv)
 167{
 168        /* growable array holding a series of lists */
 169        struct cut_list *cut_lists = NULL;
 170        unsigned nlists = 0;    /* number of elements in above list */
 171        char delim = '\t';      /* delimiter, default is tab */
 172        char *sopt, *ltok;
 173        unsigned opt;
 174
 175        opt_complementary = "b--bcf:c--bcf:f--bcf";
 176        opt = getopt32(argv, optstring, &sopt, &sopt, &sopt, &ltok);
 177//      argc -= optind;
 178        argv += optind;
 179        if (!(opt & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS)))
 180                bb_error_msg_and_die("expected a list of bytes, characters, or fields");
 181
 182        if (opt & CUT_OPT_DELIM_FLGS) {
 183                if (ltok[0] && ltok[1]) { /* more than 1 char? */
 184                        bb_error_msg_and_die("the delimiter must be a single character");
 185                }
 186                delim = ltok[0];
 187        }
 188
 189        /*  non-field (char or byte) cutting has some special handling */
 190        if (!(opt & CUT_OPT_FIELDS_FLGS)) {
 191                static const char _op_on_field[] ALIGN1 = " only when operating on fields";
 192
 193                if (opt & CUT_OPT_SUPPRESS_FLGS) {
 194                        bb_error_msg_and_die
 195                                ("suppressing non-delimited lines makes sense%s",
 196                                 _op_on_field);
 197                }
 198                if (delim != '\t') {
 199                        bb_error_msg_and_die
 200                                ("a delimiter may be specified%s", _op_on_field);
 201                }
 202        }
 203
 204        /*
 205         * parse list and put values into startpos and endpos.
 206         * valid list formats: N, N-, N-M, -M
 207         * more than one list can be separated by commas
 208         */
 209        {
 210                char *ntok;
 211                int s = 0, e = 0;
 212
 213                /* take apart the lists, one by one (they are separated with commas) */
 214                while ((ltok = strsep(&sopt, ",")) != NULL) {
 215
 216                        /* it's actually legal to pass an empty list */
 217                        if (!ltok[0])
 218                                continue;
 219
 220                        /* get the start pos */
 221                        ntok = strsep(&ltok, "-");
 222                        if (!ntok[0]) {
 223                                s = BOL;
 224                        } else {
 225                                s = xatoi_u(ntok);
 226                                /* account for the fact that arrays are zero based, while
 227                                 * the user expects the first char on the line to be char #1 */
 228                                if (s != 0)
 229                                        s--;
 230                        }
 231
 232                        /* get the end pos */
 233                        if (ltok == NULL) {
 234                                e = NON_RANGE;
 235                        } else if (!ltok[0]) {
 236                                e = EOL;
 237                        } else {
 238                                e = xatoi_u(ltok);
 239                                /* if the user specified and end position of 0,
 240                                 * that means "til the end of the line" */
 241                                if (e == 0)
 242                                        e = EOL;
 243                                e--;    /* again, arrays are zero based, lines are 1 based */
 244                                if (e == s)
 245                                        e = NON_RANGE;
 246                        }
 247
 248                        /* add the new list */
 249                        cut_lists = xrealloc_vector(cut_lists, 4, nlists);
 250                        /* NB: startpos is always >= 0,
 251                         * while endpos may be = NON_RANGE (-1) */
 252                        cut_lists[nlists].startpos = s;
 253                        cut_lists[nlists].endpos = e;
 254                        nlists++;
 255                }
 256
 257                /* make sure we got some cut positions out of all that */
 258                if (nlists == 0)
 259                        bb_error_msg_and_die("missing list of positions");
 260
 261                /* now that the lists are parsed, we need to sort them to make life
 262                 * easier on us when it comes time to print the chars / fields / lines
 263                 */
 264                qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
 265        }
 266
 267        {
 268                int retval = EXIT_SUCCESS;
 269
 270                if (!*argv)
 271                        *--argv = (char *)"-";
 272
 273                do {
 274                        FILE *file = fopen_or_warn_stdin(*argv);
 275                        if (!file) {
 276                                retval = EXIT_FAILURE;
 277                                continue;
 278                        }
 279                        cut_file(file, delim, cut_lists, nlists);
 280                        fclose_if_not_stdin(file);
 281                } while (*++argv);
 282
 283                if (ENABLE_FEATURE_CLEAN_UP)
 284                        free(cut_lists);
 285                fflush_stdout_and_exit(retval);
 286        }
 287}
 288