toybox/toys/pending/expr.c
<<
>>
Prefs
   1/* expr.c - evaluate expression
   2 *
   3 * Copyright 2016 Google Inc.
   4 * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
   5 *
   6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
   7 *
   8 * The web standard is incomplete (precedence grouping missing), see:
   9 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
  10 *
  11 * eval_expr() uses the recursive "Precedence Climbing" algorithm:
  12 *
  13 * Clarke, Keith. "The top-down parsing of expressions." University of London.
  14 * Queen Mary College. Department of Computer Science and Statistics, 1986.
  15 *
  16 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
  17 *
  18 * Nice explanation and Python implementation:
  19 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
  20
  21USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
  22
  23config EXPR
  24  bool "expr"
  25  default n
  26  help
  27    usage: expr ARG1 OPERATOR ARG2...
  28
  29    Evaluate expression and print result. For example, "expr 1 + 2".
  30
  31    The supported operators are (grouped from highest to lowest priority):
  32
  33      ( )    :    * / %    + -    != <= < >= > =    &    |
  34
  35    Each constant and operator must be a separate command line argument.
  36    All operators are infix, meaning they expect a constant (or expression
  37    that resolves to a constant) on each side of the operator. Operators of
  38    the same priority (within each group above) are evaluated left to right.
  39    Parentheses may be used (as separate arguments) to elevate the priority
  40    of expressions.
  41
  42    Calling expr from a command shell requires a lot of \( or '*' escaping
  43    to avoid interpreting shell control characters.
  44
  45    The & and | operators are logical (not bitwise) and may operate on
  46    strings (a blank string is "false"). Comparison operators may also
  47    operate on strings (alphabetical sort).
  48
  49    Constants may be strings or integers. Comparison, logical, and regex
  50    operators may operate on strings (a blank string is "false"), other
  51    operators require integers.
  52*/
  53
  54// TODO: int overflow checking
  55
  56#define FOR_expr
  57#include "toys.h"
  58
  59GLOBALS(
  60  char **tok; // current token, not on the stack since recursive calls mutate it
  61
  62  char *refree;
  63)
  64
  65// Scalar value.  If s != NULL, it's a string, otherwise it's an int.
  66struct value {
  67  char *s;
  68  long long i;
  69};
  70
  71// Get the value as a string.
  72char *get_str(struct value *v)
  73{
  74  if (v->s) return v->s;
  75  else return xmprintf("%lld", v->i);
  76}
  77
  78// Get the value as an integer and return 1, or return 0 on error.
  79int get_int(struct value *v, long long *ret)
  80{
  81  if (v->s) {
  82    char *endp;
  83
  84    *ret = strtoll(v->s, &endp, 10);
  85
  86    if (*endp) return 0; // If endp points to NUL, all chars were converted
  87  } else *ret = v->i;
  88
  89  return 1;
  90}
  91
  92// Preserve the invariant that v.s is NULL when the value is an integer.
  93void assign_int(struct value *v, long long i)
  94{
  95  v->i = i;
  96  v->s = NULL;
  97}
  98
  99// Check if v is 0 or the empty string.
 100static int is_false(struct value *v)
 101{
 102  return get_int(v, &v->i) && !v->i;
 103}
 104
 105// 'ret' is filled with a string capture or int match position.
 106static void re(char *target, char *pattern, struct value *ret)
 107{
 108  regex_t pat;
 109  regmatch_t m[2];
 110
 111  xregcomp(&pat, pattern, 0);
 112  // must match at pos 0
 113  if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
 114    // Return first parenthesized subexpression as string, or length of match
 115    if (pat.re_nsub>0) {
 116      ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
 117          target+m[1].rm_so);
 118      if (TT.refree) free(TT.refree);
 119      TT.refree = ret->s;
 120    } else assign_int(ret, m[0].rm_eo);
 121  } else {
 122    if (pat.re_nsub>0) ret->s = "";
 123    else assign_int(ret, 0);
 124  }
 125  regfree(&pat);
 126}
 127
 128// 4 different signatures of operators.  S = string, I = int, SI = string or
 129// int.
 130enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
 131
 132enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
 133
 134// operators grouped by precedence
 135static struct op_def {
 136  char *tok;
 137  char prec, sig, op; // precedence, signature for type coercion, operator ID
 138} OPS[] = {
 139  // logical ops, precedence 1 and 2, signature SI_TO_SI
 140  {"|", 1, SI_TO_SI, OR  },
 141  {"&", 2, SI_TO_SI, AND },
 142  // comparison ops, precedence 3, signature SI_TO_I
 143  {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
 144  {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
 145  {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE }, 
 146  // arithmetic ops, precedence 4 and 5, signature I_TO_I
 147  {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
 148  {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
 149  // regex match, precedence 6, signature S_TO_SI
 150  {":", 6, S_TO_SI, RE },
 151  {NULL, 0, 0, 0}, // sentinel
 152};
 153
 154void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
 155{
 156  long long a, b, x = 0; // x = a OP b for ints.
 157  char *s, *t; // string operands
 158  int cmp;
 159
 160  switch (o->sig) {
 161
 162  case SI_TO_SI:
 163    switch (o->op) {
 164    case OR:  if (is_false(ret)) *ret = *rhs; break;
 165    case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
 166    }
 167    break;  
 168
 169  case SI_TO_I:
 170    if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
 171      cmp = a - b;
 172    } else { // otherwise compare both as strings
 173      cmp = strcmp(s = get_str(ret), t = get_str(rhs));
 174      if (ret->s != s) free(s);
 175      if (rhs->s != t) free(t);
 176    }
 177    switch (o->op) {
 178    case EQ:  x = cmp == 0; break;
 179    case NE:  x = cmp != 0; break;
 180    case GT:  x = cmp >  0; break;
 181    case GTE: x = cmp >= 0; break;
 182    case LT:  x = cmp <  0; break;
 183    case LTE: x = cmp <= 0; break;
 184    }
 185    assign_int(ret, x);
 186    break;
 187
 188  case I_TO_I:
 189    if (!get_int(ret, &a) || !get_int(rhs, &b))
 190      error_exit("non-integer argument");
 191    switch (o->op) {
 192    case ADD: x = a + b; break;
 193    case SUB: x = a - b; break;
 194    case MUL: x = a * b; break;
 195    case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
 196    case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
 197    }
 198    assign_int(ret, x);
 199    break;
 200
 201  case S_TO_SI: // op == RE
 202    s = get_str(ret);
 203    cmp = ret->s!=s; // ret overwritten by re so check now
 204    re(s, t = get_str(rhs), ret);
 205    if (cmp) free(s);
 206    if (rhs->s!=t) free(t);
 207    break;
 208  }
 209}
 210
 211// Evalute a compound expression using recursive "Precedence Climbing"
 212// algorithm, setting 'ret'.
 213static void eval_expr(struct value *ret, int min_prec)
 214{
 215  if (!*TT.tok) error_exit("Unexpected end of input");
 216
 217  // Evaluate LHS atom, setting 'ret'.
 218  if (!strcmp(*TT.tok, "(")) { // parenthesized expression
 219    TT.tok++;  // consume (
 220
 221    eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
 222    if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
 223    if (!*TT.tok) error_exit("Expected )");
 224    if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
 225  } else ret->s = *TT.tok;  // simple literal, all values start as strings
 226  TT.tok++;
 227
 228  // Evaluate RHS and apply operator until precedence is too low.
 229  struct value rhs;
 230  while (*TT.tok) {
 231    struct op_def *o = OPS;
 232
 233    while (o->tok) { // Look up operator
 234      if (!strcmp(*TT.tok, o->tok)) break;
 235      o++;
 236    }
 237    if (!o->tok) break; // Not an operator (extra input will fail later)
 238    if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
 239    TT.tok++;
 240
 241    eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
 242    eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
 243  }
 244}
 245
 246void expr_main(void)
 247{
 248  struct value ret = {0};
 249
 250  toys.exitval = 2; // if exiting early, indicate error
 251  TT.tok = toys.optargs; // initialize global token
 252  eval_expr(&ret, 1);
 253  if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
 254
 255  if (ret.s) printf("%s\n", ret.s);
 256  else printf("%lld\n", ret.i);
 257
 258  toys.exitval = is_false(&ret);
 259
 260  if (TT.refree) free(TT.refree);
 261}
 262