toybox/toys/posix/sort.c
<<
>>
Prefs
   1/* sort.c - put input lines into order
   2 *
   3 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
   4 *
   5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
   6 *
   7 * Deviations from POSIX: Lots.
   8 * We invented -x
   9
  10USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN))
  11
  12config SORT
  13  bool "sort"
  14  default y
  15  help
  16    usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
  17
  18    Sort all lines of text from input files (or stdin) to stdout.
  19
  20    -r  Reverse
  21    -u  Unique lines only
  22    -n  Numeric order (instead of alphabetical)
  23    -b  Ignore leading blanks (or trailing blanks in second part of key)
  24    -c  Check whether input is sorted
  25    -d  Dictionary order (use alphanumeric and whitespace chars only)
  26    -f  Force uppercase (case insensitive sort)
  27    -i  Ignore nonprinting characters
  28    -M  Month sort (jan, feb, etc)
  29    -x  Hexadecimal numerical sort
  30    -s  Skip fallback sort (only sort with keys)
  31    -z  Zero (null) terminated lines
  32    -k  Sort by "key" (see below)
  33    -t  Use a key separator other than whitespace
  34    -o  Output to FILE instead of stdout
  35    -V  Version numbers (name-1.234-rc6.5b.tgz)
  36
  37    Sorting by key looks at a subset of the words on each line.  -k2
  38    uses the second word to the end of the line, -k2,2 looks at only
  39    the second word, -k2,4 looks from the start of the second to the end
  40    of the fourth word.  Specifying multiple keys uses the later keys as
  41    tie breakers, in order.  A type specifier appended to a sort key
  42    (such as -2,2n) applies only to sorting that key.
  43
  44config SORT_FLOAT
  45  bool
  46  default y
  47  depends on TOYBOX_FLOAT
  48  help
  49    usage: sort [-g]
  50
  51    -g  General numeric sort (double precision with nan and inf)
  52*/
  53
  54#define FOR_sort
  55#include "toys.h"
  56
  57GLOBALS(
  58  char *t;
  59  struct arg_list *k;
  60  char *o, *T, S;
  61
  62  void *key_list;
  63  int linecount;
  64  char **lines;
  65)
  66
  67// The sort types are n, g, and M.
  68// u, c, s, and z apply to top level only, not to keys.
  69// b at top level implies bb.
  70// The remaining options can be applied to search keys.
  71
  72#define FLAG_bb (1<<31)  // Ignore trailing blanks
  73
  74struct sort_key
  75{
  76  struct sort_key *next_key;  // linked list
  77  unsigned range[4];          // start word, start char, end word, end char
  78  int flags;
  79};
  80
  81// Copy of the part of this string corresponding to a key/flags.
  82
  83static char *get_key_data(char *str, struct sort_key *key, int flags)
  84{
  85  int start=0, end, len, i, j;
  86
  87  // Special case whole string, so we don't have to make a copy
  88
  89  if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
  90    && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
  91
  92  // Find start of key on first pass, end on second pass
  93
  94  len = strlen(str);
  95  for (j=0; j<2; j++) {
  96    if (!key->range[2*j]) end=len;
  97
  98    // Loop through fields
  99    else {
 100      end=0;
 101      for (i=1; i < key->range[2*j]+j; i++) {
 102
 103        // Skip leading blanks
 104        if (str[end] && !TT.t) while (isspace(str[end])) end++;
 105
 106        // Skip body of key
 107        for (; str[end]; end++) {
 108          if (TT.t) {
 109            if (str[end]==*TT.t) {
 110              end++;
 111              break;
 112            }
 113          } else if (isspace(str[end])) break;
 114        }
 115      }
 116    }
 117    if (!j) start=end;
 118  }
 119
 120  // Key with explicit separator starts after the separator
 121  if (TT.t && str[start]==*TT.t) start++;
 122
 123  // Strip leading and trailing whitespace if necessary
 124  if (flags&FLAG_b) while (isspace(str[start])) start++;
 125  if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
 126
 127  // Handle offsets on start and end
 128  if (key->range[3]) {
 129    end += key->range[3]-1;
 130    if (end>len) end=len;
 131  }
 132  if (key->range[1]) {
 133    start += key->range[1]-1;
 134    if (start>len) start=len;
 135  }
 136
 137  // Make the copy
 138  if (end<start) end=start;
 139  str = xstrndup(str+start, end-start);
 140
 141  // Handle -d
 142  if (flags&FLAG_d) {
 143    for (start = end = 0; str[end]; end++)
 144      if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
 145    str[start] = 0;
 146  }
 147
 148  // Handle -i
 149  if (flags&FLAG_i) {
 150    for (start = end = 0; str[end]; end++)
 151      if (isprint(str[end])) str[start++] = str[end];
 152    str[start] = 0;
 153  }
 154
 155  return str;
 156}
 157
 158// append a sort_key to key_list.
 159
 160static struct sort_key *add_key(void)
 161{
 162  void **stupid_compiler = &TT.key_list;
 163  struct sort_key **pkey = (struct sort_key **)stupid_compiler;
 164
 165  while (*pkey) pkey = &((*pkey)->next_key);
 166  return *pkey = xzalloc(sizeof(struct sort_key));
 167}
 168
 169// Perform actual comparison
 170static int compare_values(int flags, char *x, char *y)
 171{
 172  if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
 173    char *xx,*yy;
 174    double dx = strtod(x,&xx), dy = strtod(y,&yy);
 175    int xinf, yinf;
 176
 177    // not numbers < NaN < -infinity < numbers < +infinity
 178
 179    if (x==xx) return y==yy ? 0 : -1;
 180    if (y==yy) return 1;
 181
 182    // Check for isnan
 183    if (dx!=dx) return (dy!=dy) ? 0 : -1;
 184    if (dy!=dy) return 1;
 185
 186    // Check for infinity.  (Could underflow, but avoids needing libm.)
 187    xinf = (1.0/dx == 0.0);
 188    yinf = (1.0/dy == 0.0);
 189    if (xinf) {
 190      if(dx<0) return (yinf && dy<0) ? 0 : -1;
 191      return (yinf && dy>0) ? 0 : 1;
 192    }
 193    if (yinf) return dy<0 ? 1 : -1;
 194
 195    return dx>dy ? 1 : (dx<dy ? -1 : 0);
 196  } else if (flags & FLAG_M) {
 197    struct tm thyme;
 198    int dx;
 199    char *xx,*yy;
 200
 201    xx = strptime(x,"%b",&thyme);
 202    dx = thyme.tm_mon;
 203    yy = strptime(y,"%b",&thyme);
 204    if (!xx) return !yy ? 0 : -1;
 205    else if (!yy) return 1;
 206    else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
 207
 208  } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
 209  else if (flags & FLAG_V) {
 210    while (*x && *y) {
 211      while (*x && *x == *y) x++, y++;
 212      if (isdigit(*x) && isdigit(*y)) {
 213        long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
 214
 215        if (xx<yy) return -1;
 216        if (xx>yy) return 1;
 217      } else {
 218        char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
 219
 220        // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
 221        if (xx != yy) {
 222          if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
 223          else return 1;
 224        }
 225      }
 226    }
 227    return *x ? !!*y : -1;
 228  } else if (flags & FLAG_n) {
 229    // Full floating point version of -n
 230    if (CFG_SORT_FLOAT) {
 231      double dx = atof(x), dy = atof(y);
 232
 233      return dx>dy ? 1 : (dx<dy ? -1 : 0);
 234    // Integer version of -n for tiny systems
 235    } else return atoi(x)-atoi(y);
 236
 237  // Ascii sort
 238  } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
 239}
 240
 241// Callback from qsort(): Iterate through key_list and perform comparisons.
 242static int compare_keys(const void *xarg, const void *yarg)
 243{
 244  int flags = toys.optflags, retval = 0;
 245  char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
 246  struct sort_key *key;
 247
 248  for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){
 249    flags = key->flags ? key->flags : toys.optflags;
 250
 251    // Chop out and modify key chunks, handling -dfib
 252
 253    x = get_key_data(xx, key, flags);
 254    y = get_key_data(yy, key, flags);
 255
 256    retval = compare_values(flags, x, y);
 257
 258    // Free the copies get_key_data() made.
 259
 260    if (x != xx) free(x);
 261    if (y != yy) free(y);
 262
 263    if (retval) break;
 264  }
 265
 266  // Perform fallback sort if necessary (always case insensitive, no -f,
 267  // the point is to get a stable order even for -f sorts)
 268  if (!retval && !FLAG(s)) {
 269    flags = toys.optflags;
 270    retval = strcmp(xx, yy);
 271  }
 272
 273  return retval * ((flags&FLAG_r) ? -1 : 1);
 274}
 275
 276// Callback from loopfiles to handle input files.
 277static void sort_read(int fd, char *name)
 278{
 279  // Read each line from file, appending to a big array.
 280
 281  for (;;) {
 282    char * line = FLAG(z) ? get_rawline(fd, NULL, 0) : get_line(fd);
 283
 284    if (!line) break;
 285
 286    // handle -c here so we don't allocate more memory than necessary.
 287    if (FLAG(c)) {
 288      int j = FLAG(u) ? -1 : 0;
 289
 290      if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
 291        error_exit("%s: Check line %d\n", name, TT.linecount);
 292      free(TT.lines);
 293      TT.lines = (char **)line;
 294    } else {
 295      if (!(TT.linecount&63))
 296        TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
 297      TT.lines[TT.linecount] = line;
 298    }
 299    TT.linecount++;
 300  }
 301}
 302
 303void sort_main(void)
 304{
 305  int idx, fd = 1;
 306
 307  // Parse -k sort keys.
 308  if (TT.k) {
 309    struct arg_list *arg;
 310
 311    for (arg = TT.k; arg; arg = arg->next) {
 312      struct sort_key *key = add_key();
 313      char *temp;
 314      int flag;
 315
 316      idx = 0;
 317      temp = arg->arg;
 318      while (*temp) {
 319        // Start of range
 320        key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
 321        if (*temp=='.')
 322          key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
 323
 324        // Handle flags appended to a key type.
 325        for (;*temp;temp++) {
 326          char *temp2, *optlist;
 327
 328          // Note that a second comma becomes an "Unknown key" error.
 329
 330          if (*temp==',' && !idx++) {
 331            temp++;
 332            break;
 333          }
 334
 335          // Which flag is this?
 336
 337          optlist = toys.which->options;
 338          temp2 = strchr(optlist, *temp);
 339          flag = (1<<(optlist-temp2+strlen(optlist)-1));
 340
 341          // Was it a flag that can apply to a key?
 342
 343          if (!temp2 || flag>FLAG_x
 344            || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
 345          {
 346            error_exit("Unknown key option.");
 347          }
 348          // b after , means strip _trailing_ space, not leading.
 349          if (idx && flag==FLAG_b) flag = FLAG_bb;
 350          key->flags |= flag;
 351        }
 352      }
 353    }
 354  }
 355
 356  // global b flag strips both leading and trailing spaces
 357  if (FLAG(b)) toys.optflags |= FLAG_bb;
 358
 359  // If no keys, perform alphabetic sort over the whole line.
 360  if (!TT.key_list) add_key()->range[0] = 1;
 361
 362  // Open input files and read data, populating TT.lines[TT.linecount]
 363  loopfiles(toys.optargs, sort_read);
 364
 365  // The compare (-c) logic was handled in sort_read(),
 366  // so if we got here, we're done.
 367  if (FLAG(c)) goto exit_now;
 368
 369  // Perform the actual sort
 370  qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
 371
 372  // handle unique (-u)
 373  if (FLAG(u)) {
 374    int jdx;
 375
 376    for (jdx=0, idx=1; idx<TT.linecount; idx++) {
 377      if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
 378        free(TT.lines[idx]);
 379      else TT.lines[++jdx] = TT.lines[idx];
 380    }
 381    if (TT.linecount) TT.linecount = jdx+1;
 382  }
 383
 384  // Open output file if necessary. We can't do this until we've finished
 385  // reading in case the output file is one of the input files.
 386  if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
 387
 388  // Output result
 389  for (idx = 0; idx<TT.linecount; idx++) {
 390    char *s = TT.lines[idx];
 391    unsigned i = strlen(s);
 392
 393    if (!FLAG(z)) s[i] = '\n';
 394    xwrite(fd, s, i+1);
 395    if (CFG_TOYBOX_FREE) free(s);
 396  }
 397
 398exit_now:
 399  if (CFG_TOYBOX_FREE) {
 400    if (fd != 1) close(fd);
 401    free(TT.lines);
 402  }
 403}
 404