busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
   8 */
   9
  10#include "libbb.h"
  11#include "xregex.h"
  12#include <math.h>
  13
  14/* This is a NOEXEC applet. Be very careful! */
  15
  16
  17#define MAXVARFMT       240
  18#define MINNVBLOCK      64
  19
  20/* variable flags */
  21#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  22#define VF_ARRAY        0x0002  /* 1 = it's an array */
  23
  24#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
  25#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
  26#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
  27#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
  28#define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
  29#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
  30#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
  31
  32/* these flags are static, don't change them when value is changed */
  33#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
  34
  35/* Variable */
  36typedef struct var_s {
  37        unsigned type;            /* flags */
  38        double number;
  39        char *string;
  40        union {
  41                int aidx;               /* func arg idx (for compilation stage) */
  42                struct xhash_s *array;  /* array ptr */
  43                struct var_s *parent;   /* for func args, ptr to actual parameter */
  44                char **walker;          /* list of array elements (for..in) */
  45        } x;
  46} var;
  47
  48/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
  49typedef struct chain_s {
  50        struct node_s *first;
  51        struct node_s *last;
  52        const char *programname;
  53} chain;
  54
  55/* Function */
  56typedef struct func_s {
  57        unsigned nargs;
  58        struct chain_s body;
  59} func;
  60
  61/* I/O stream */
  62typedef struct rstream_s {
  63        FILE *F;
  64        char *buffer;
  65        int adv;
  66        int size;
  67        int pos;
  68        smallint is_pipe;
  69} rstream;
  70
  71typedef struct hash_item_s {
  72        union {
  73                struct var_s v;         /* variable/array hash */
  74                struct rstream_s rs;    /* redirect streams hash */
  75                struct func_s f;        /* functions hash */
  76        } data;
  77        struct hash_item_s *next;       /* next in chain */
  78        char name[1];                   /* really it's longer */
  79} hash_item;
  80
  81typedef struct xhash_s {
  82        unsigned nel;           /* num of elements */
  83        unsigned csize;         /* current hash size */
  84        unsigned nprime;        /* next hash size in PRIMES[] */
  85        unsigned glen;          /* summary length of item names */
  86        struct hash_item_s **items;
  87} xhash;
  88
  89/* Tree node */
  90typedef struct node_s {
  91        uint32_t info;
  92        unsigned lineno;
  93        union {
  94                struct node_s *n;
  95                var *v;
  96                int i;
  97                char *s;
  98                regex_t *re;
  99        } l;
 100        union {
 101                struct node_s *n;
 102                regex_t *ire;
 103                func *f;
 104                int argno;
 105        } r;
 106        union {
 107                struct node_s *n;
 108        } a;
 109} node;
 110
 111/* Block of temporary variables */
 112typedef struct nvblock_s {
 113        int size;
 114        var *pos;
 115        struct nvblock_s *prev;
 116        struct nvblock_s *next;
 117        var nv[0];
 118} nvblock;
 119
 120typedef struct tsplitter_s {
 121        node n;
 122        regex_t re[2];
 123} tsplitter;
 124
 125/* simple token classes */
 126/* Order and hex values are very important!!!  See next_token() */
 127#define TC_SEQSTART      1                              /* ( */
 128#define TC_SEQTERM      (1 << 1)                /* ) */
 129#define TC_REGEXP       (1 << 2)                /* /.../ */
 130#define TC_OUTRDR       (1 << 3)                /* | > >> */
 131#define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
 132#define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
 133#define TC_BINOPX       (1 << 6)                /* two-opnd operator */
 134#define TC_IN           (1 << 7)
 135#define TC_COMMA        (1 << 8)
 136#define TC_PIPE         (1 << 9)                /* input redirection pipe */
 137#define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
 138#define TC_ARRTERM      (1 << 11)               /* ] */
 139#define TC_GRPSTART     (1 << 12)               /* { */
 140#define TC_GRPTERM      (1 << 13)               /* } */
 141#define TC_SEMICOL      (1 << 14)
 142#define TC_NEWLINE      (1 << 15)
 143#define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
 144#define TC_WHILE        (1 << 17)
 145#define TC_ELSE         (1 << 18)
 146#define TC_BUILTIN      (1 << 19)
 147#define TC_GETLINE      (1 << 20)
 148#define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
 149#define TC_BEGIN        (1 << 22)
 150#define TC_END          (1 << 23)
 151#define TC_EOF          (1 << 24)
 152#define TC_VARIABLE     (1 << 25)
 153#define TC_ARRAY        (1 << 26)
 154#define TC_FUNCTION     (1 << 27)
 155#define TC_STRING       (1 << 28)
 156#define TC_NUMBER       (1 << 29)
 157
 158#define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
 159
 160/* combined token classes */
 161#define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 162#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 163#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 164                   | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
 165
 166#define TC_STATEMNT (TC_STATX | TC_WHILE)
 167#define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
 168
 169/* word tokens, cannot mean something else if not expected */
 170#define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
 171                   | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
 172
 173/* discard newlines after these */
 174#define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
 175                   | TC_BINOP | TC_OPTERM)
 176
 177/* what can expression begin with */
 178#define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
 179/* what can group begin with */
 180#define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
 181
 182/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 183/* operator is inserted between them */
 184#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
 185                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
 186#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 187
 188#define OF_RES1    0x010000
 189#define OF_RES2    0x020000
 190#define OF_STR1    0x040000
 191#define OF_STR2    0x080000
 192#define OF_NUM1    0x100000
 193#define OF_CHECKED 0x200000
 194
 195/* combined operator flags */
 196#define xx      0
 197#define xV      OF_RES2
 198#define xS      (OF_RES2 | OF_STR2)
 199#define Vx      OF_RES1
 200#define VV      (OF_RES1 | OF_RES2)
 201#define Nx      (OF_RES1 | OF_NUM1)
 202#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 203#define Sx      (OF_RES1 | OF_STR1)
 204#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 205#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 206
 207#define OPCLSMASK 0xFF00
 208#define OPNMASK   0x007F
 209
 210/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
 212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
 213 */
 214#define P(x)      (x << 24)
 215#define PRIMASK   0x7F000000
 216#define PRIMASK2  0x7E000000
 217
 218/* Operation classes */
 219
 220#define SHIFT_TIL_THIS  0x0600
 221#define RECUR_FROM_THIS 0x1000
 222
 223enum {
 224        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 225        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 226
 227        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 228        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 229        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 230
 231        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 232        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 233        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 234        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 235        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 236        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 237        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 238        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 239        OC_DONE = 0x2800,
 240
 241        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 242        ST_WHILE = 0x3300
 243};
 244
 245/* simple builtins */
 246enum {
 247        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 248        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 249};
 250
 251/* builtins */
 252enum {
 253        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
 254        B_ge,   B_gs,   B_su,
 255        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 256};
 257
 258/* tokens and their corresponding info values */
 259
 260#define NTC     "\377"  /* switch to next token class (tc<<1) */
 261#define NTCC    '\377'
 262
 263#define OC_B    OC_BUILTIN
 264
 265static const char tokenlist[] ALIGN1 =
 266        "\1("       NTC
 267        "\1)"       NTC
 268        "\1/"       NTC                                 /* REGEXP */
 269        "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
 270        "\2++"      "\2--"      NTC                     /* UOPPOST */
 271        "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
 272        "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
 273        "\2*="      "\2/="      "\2%="      "\2^="
 274        "\1+"       "\1-"       "\3**="     "\2**"
 275        "\1/"       "\1%"       "\1^"       "\1*"
 276        "\2!="      "\2>="      "\2<="      "\1>"
 277        "\1<"       "\2!~"      "\1~"       "\2&&"
 278        "\2||"      "\1?"       "\1:"       NTC
 279        "\2in"      NTC
 280        "\1,"       NTC
 281        "\1|"       NTC
 282        "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
 283        "\1]"       NTC
 284        "\1{"       NTC
 285        "\1}"       NTC
 286        "\1;"       NTC
 287        "\1\n"      NTC
 288        "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
 289        "\10continue"           "\6delete"  "\5print"
 290        "\6printf"  "\4next"    "\10nextfile"
 291        "\6return"  "\4exit"    NTC
 292        "\5while"   NTC
 293        "\4else"    NTC
 294
 295        "\3and"     "\5compl"   "\6lshift"  "\2or"
 296        "\6rshift"  "\3xor"
 297        "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
 298        "\3cos"     "\3exp"     "\3int"     "\3log"
 299        "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
 300        "\6gensub"  "\4gsub"    "\5index"   "\6length"
 301        "\5match"   "\5split"   "\7sprintf" "\3sub"
 302        "\6substr"  "\7systime" "\10strftime"
 303        "\7tolower" "\7toupper" NTC
 304        "\7getline" NTC
 305        "\4func"    "\10function"   NTC
 306        "\5BEGIN"   NTC
 307        "\3END"     "\0"
 308        ;
 309
 310static const uint32_t tokeninfo[] = {
 311        0,
 312        0,
 313        OC_REGEXP,
 314        xS|'a',     xS|'w',     xS|'|',
 315        OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
 316        OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
 317            OC_FIELD|xV|P(5),
 318        OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
 319            OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
 320        OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
 321            OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
 322        OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
 323            OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
 324        OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
 325            OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
 326        OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
 327            OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
 328        OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
 329            OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
 330        OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
 331            OC_COLON|xx|P(67)|':',
 332        OC_IN|SV|P(49),
 333        OC_COMMA|SS|P(80),
 334        OC_PGETLINE|SV|P(37),
 335        OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
 336            OC_UNARY|xV|P(19)|'!',
 337        0,
 338        0,
 339        0,
 340        0,
 341        0,
 342        ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
 343        OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
 344        OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
 345        OC_RETURN|Vx,   OC_EXIT|Nx,
 346        ST_WHILE,
 347        0,
 348
 349        OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
 350        OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
 351        OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
 352        OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
 353        OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
 354        OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
 355        OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
 356        OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
 357        OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
 358        OC_GETLINE|SV|P(0),
 359        0,      0,
 360        0,
 361        0
 362};
 363
 364/* internal variable names and their initial values       */
 365/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 366enum {
 367        CONVFMT,    OFMT,       FS,         OFS,
 368        ORS,        RS,         RT,         FILENAME,
 369        SUBSEP,     F0,         ARGIND,     ARGC,
 370        ARGV,       ERRNO,      FNR,        NR,
 371        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 372};
 373
 374static const char vNames[] ALIGN1 =
 375        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 376        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 377        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 378        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 379        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 380
 381static const char vValues[] ALIGN1 =
 382        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 383        "\n\0"      "\n\0"      "\0"        "\0"
 384        "\034\0"    "\0"        "\377";
 385
 386/* hash size may grow to these values */
 387#define FIRST_PRIME 61
 388static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 389
 390
 391/* Globals. Split in two parts so that first one is addressed
 392 * with (mostly short) negative offsets.
 393 * NB: it's unsafe to put members of type "double"
 394 * into globals2 (gcc may fail to align them).
 395 */
 396struct globals {
 397        double t_double;
 398        chain beginseq, mainseq, endseq;
 399        chain *seq;
 400        node *break_ptr, *continue_ptr;
 401        rstream *iF;
 402        xhash *vhash, *ahash, *fdhash, *fnhash;
 403        const char *g_progname;
 404        int g_lineno;
 405        int nfields;
 406        int maxfields; /* used in fsrealloc() only */
 407        var *Fields;
 408        nvblock *g_cb;
 409        char *g_pos;
 410        char *g_buf;
 411        smallint icase;
 412        smallint exiting;
 413        smallint nextrec;
 414        smallint nextfile;
 415        smallint is_f0_split;
 416};
 417struct globals2 {
 418        uint32_t t_info; /* often used */
 419        uint32_t t_tclass;
 420        char *t_string;
 421        int t_lineno;
 422        int t_rollback;
 423
 424        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 425
 426        /* former statics from various functions */
 427        char *split_f0__fstrings;
 428
 429        uint32_t next_token__save_tclass;
 430        uint32_t next_token__save_info;
 431        uint32_t next_token__ltclass;
 432        smallint next_token__concat_inserted;
 433
 434        smallint next_input_file__files_happen;
 435        rstream next_input_file__rsm;
 436
 437        var *evaluate__fnargs;
 438        unsigned evaluate__seed;
 439        regex_t evaluate__sreg;
 440
 441        var ptest__v;
 442
 443        tsplitter exec_builtin__tspl;
 444
 445        /* biggest and least used members go last */
 446        tsplitter fsplitter, rsplitter;
 447};
 448#define G1 (ptr_to_globals[-1])
 449#define G (*(struct globals2 *)ptr_to_globals)
 450/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 451/*char G1size[sizeof(G1)]; - 0x74 */
 452/*char Gsize[sizeof(G)]; - 0x1c4 */
 453/* Trying to keep most of members accessible with short offsets: */
 454/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
 455#define t_double     (G1.t_double    )
 456#define beginseq     (G1.beginseq    )
 457#define mainseq      (G1.mainseq     )
 458#define endseq       (G1.endseq      )
 459#define seq          (G1.seq         )
 460#define break_ptr    (G1.break_ptr   )
 461#define continue_ptr (G1.continue_ptr)
 462#define iF           (G1.iF          )
 463#define vhash        (G1.vhash       )
 464#define ahash        (G1.ahash       )
 465#define fdhash       (G1.fdhash      )
 466#define fnhash       (G1.fnhash      )
 467#define g_progname   (G1.g_progname  )
 468#define g_lineno     (G1.g_lineno    )
 469#define nfields      (G1.nfields     )
 470#define maxfields    (G1.maxfields   )
 471#define Fields       (G1.Fields      )
 472#define g_cb         (G1.g_cb        )
 473#define g_pos        (G1.g_pos       )
 474#define g_buf        (G1.g_buf       )
 475#define icase        (G1.icase       )
 476#define exiting      (G1.exiting     )
 477#define nextrec      (G1.nextrec     )
 478#define nextfile     (G1.nextfile    )
 479#define is_f0_split  (G1.is_f0_split )
 480#define t_info       (G.t_info      )
 481#define t_tclass     (G.t_tclass    )
 482#define t_string     (G.t_string    )
 483#define t_lineno     (G.t_lineno    )
 484#define t_rollback   (G.t_rollback  )
 485#define intvar       (G.intvar      )
 486#define fsplitter    (G.fsplitter   )
 487#define rsplitter    (G.rsplitter   )
 488#define INIT_G() do { \
 489        SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
 490        G.next_token__ltclass = TC_OPTERM; \
 491        G.evaluate__seed = 1; \
 492} while (0)
 493
 494
 495/* function prototypes */
 496static void handle_special(var *);
 497static node *parse_expr(uint32_t);
 498static void chain_group(void);
 499static var *evaluate(node *, var *);
 500static rstream *next_input_file(void);
 501static int fmt_num(char *, int, const char *, double, int);
 502static int awk_exit(int) NORETURN;
 503
 504/* ---- error handling ---- */
 505
 506static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
 507static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 508static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 509static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 510static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 511static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
 512static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 513static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 514static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 515#if !ENABLE_FEATURE_AWK_LIBM
 516static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 517#endif
 518
 519static void zero_out_var(var * vp)
 520{
 521        memset(vp, 0, sizeof(*vp));
 522}
 523
 524static void syntax_error(const char *const message) NORETURN;
 525static void syntax_error(const char *const message)
 526{
 527        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 528}
 529
 530/* ---- hash stuff ---- */
 531
 532static unsigned hashidx(const char *name)
 533{
 534        unsigned idx = 0;
 535
 536        while (*name) idx = *name++ + (idx << 6) - idx;
 537        return idx;
 538}
 539
 540/* create new hash */
 541static xhash *hash_init(void)
 542{
 543        xhash *newhash;
 544
 545        newhash = xzalloc(sizeof(xhash));
 546        newhash->csize = FIRST_PRIME;
 547        newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
 548
 549        return newhash;
 550}
 551
 552/* find item in hash, return ptr to data, NULL if not found */
 553static void *hash_search(xhash *hash, const char *name)
 554{
 555        hash_item *hi;
 556
 557        hi = hash->items [ hashidx(name) % hash->csize ];
 558        while (hi) {
 559                if (strcmp(hi->name, name) == 0)
 560                        return &(hi->data);
 561                hi = hi->next;
 562        }
 563        return NULL;
 564}
 565
 566/* grow hash if it becomes too big */
 567static void hash_rebuild(xhash *hash)
 568{
 569        unsigned newsize, i, idx;
 570        hash_item **newitems, *hi, *thi;
 571
 572        if (hash->nprime == ARRAY_SIZE(PRIMES))
 573                return;
 574
 575        newsize = PRIMES[hash->nprime++];
 576        newitems = xzalloc(newsize * sizeof(hash_item *));
 577
 578        for (i = 0; i < hash->csize; i++) {
 579                hi = hash->items[i];
 580                while (hi) {
 581                        thi = hi;
 582                        hi = thi->next;
 583                        idx = hashidx(thi->name) % newsize;
 584                        thi->next = newitems[idx];
 585                        newitems[idx] = thi;
 586                }
 587        }
 588
 589        free(hash->items);
 590        hash->csize = newsize;
 591        hash->items = newitems;
 592}
 593
 594/* find item in hash, add it if necessary. Return ptr to data */
 595static void *hash_find(xhash *hash, const char *name)
 596{
 597        hash_item *hi;
 598        unsigned idx;
 599        int l;
 600
 601        hi = hash_search(hash, name);
 602        if (!hi) {
 603                if (++hash->nel / hash->csize > 10)
 604                        hash_rebuild(hash);
 605
 606                l = strlen(name) + 1;
 607                hi = xzalloc(sizeof(*hi) + l);
 608                strcpy(hi->name, name);
 609
 610                idx = hashidx(name) % hash->csize;
 611                hi->next = hash->items[idx];
 612                hash->items[idx] = hi;
 613                hash->glen += l;
 614        }
 615        return &(hi->data);
 616}
 617
 618#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 619#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 620#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 621#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 622
 623static void hash_remove(xhash *hash, const char *name)
 624{
 625        hash_item *hi, **phi;
 626
 627        phi = &(hash->items[hashidx(name) % hash->csize]);
 628        while (*phi) {
 629                hi = *phi;
 630                if (strcmp(hi->name, name) == 0) {
 631                        hash->glen -= (strlen(name) + 1);
 632                        hash->nel--;
 633                        *phi = hi->next;
 634                        free(hi);
 635                        break;
 636                }
 637                phi = &(hi->next);
 638        }
 639}
 640
 641/* ------ some useful functions ------ */
 642
 643static void skip_spaces(char **s)
 644{
 645        char *p = *s;
 646
 647        while (1) {
 648                if (*p == '\\' && p[1] == '\n') {
 649                        p++;
 650                        t_lineno++;
 651                } else if (*p != ' ' && *p != '\t') {
 652                        break;
 653                }
 654                p++;
 655        }
 656        *s = p;
 657}
 658
 659static char *nextword(char **s)
 660{
 661        char *p = *s;
 662
 663        while (*(*s)++) /* */;
 664
 665        return p;
 666}
 667
 668static char nextchar(char **s)
 669{
 670        char c, *pps;
 671
 672        c = *((*s)++);
 673        pps = *s;
 674        if (c == '\\') c = bb_process_escape_sequence((const char**)s);
 675        if (c == '\\' && *s == pps) c = *((*s)++);
 676        return c;
 677}
 678
 679static ALWAYS_INLINE int isalnum_(int c)
 680{
 681        return (isalnum(c) || c == '_');
 682}
 683
 684static double my_strtod(char **pp)
 685{
 686#if ENABLE_DESKTOP
 687        if ((*pp)[0] == '0'
 688         && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
 689        ) {
 690                return strtoull(*pp, pp, 0);
 691        }
 692#endif
 693        return strtod(*pp, pp);
 694}
 695
 696/* -------- working with variables (set/get/copy/etc) -------- */
 697
 698static xhash *iamarray(var *v)
 699{
 700        var *a = v;
 701
 702        while (a->type & VF_CHILD)
 703                a = a->x.parent;
 704
 705        if (!(a->type & VF_ARRAY)) {
 706                a->type |= VF_ARRAY;
 707                a->x.array = hash_init();
 708        }
 709        return a->x.array;
 710}
 711
 712static void clear_array(xhash *array)
 713{
 714        unsigned i;
 715        hash_item *hi, *thi;
 716
 717        for (i = 0; i < array->csize; i++) {
 718                hi = array->items[i];
 719                while (hi) {
 720                        thi = hi;
 721                        hi = hi->next;
 722                        free(thi->data.v.string);
 723                        free(thi);
 724                }
 725                array->items[i] = NULL;
 726        }
 727        array->glen = array->nel = 0;
 728}
 729
 730/* clear a variable */
 731static var *clrvar(var *v)
 732{
 733        if (!(v->type & VF_FSTR))
 734                free(v->string);
 735
 736        v->type &= VF_DONTTOUCH;
 737        v->type |= VF_DIRTY;
 738        v->string = NULL;
 739        return v;
 740}
 741
 742/* assign string value to variable */
 743static var *setvar_p(var *v, char *value)
 744{
 745        clrvar(v);
 746        v->string = value;
 747        handle_special(v);
 748        return v;
 749}
 750
 751/* same as setvar_p but make a copy of string */
 752static var *setvar_s(var *v, const char *value)
 753{
 754        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 755}
 756
 757/* same as setvar_s but set USER flag */
 758static var *setvar_u(var *v, const char *value)
 759{
 760        setvar_s(v, value);
 761        v->type |= VF_USER;
 762        return v;
 763}
 764
 765/* set array element to user string */
 766static void setari_u(var *a, int idx, const char *s)
 767{
 768        char sidx[sizeof(int)*3 + 1];
 769        var *v;
 770
 771        sprintf(sidx, "%d", idx);
 772        v = findvar(iamarray(a), sidx);
 773        setvar_u(v, s);
 774}
 775
 776/* assign numeric value to variable */
 777static var *setvar_i(var *v, double value)
 778{
 779        clrvar(v);
 780        v->type |= VF_NUMBER;
 781        v->number = value;
 782        handle_special(v);
 783        return v;
 784}
 785
 786static const char *getvar_s(var *v)
 787{
 788        /* if v is numeric and has no cached string, convert it to string */
 789        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
 790                fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
 791                v->string = xstrdup(g_buf);
 792                v->type |= VF_CACHED;
 793        }
 794        return (v->string == NULL) ? "" : v->string;
 795}
 796
 797static double getvar_i(var *v)
 798{
 799        char *s;
 800
 801        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
 802                v->number = 0;
 803                s = v->string;
 804                if (s && *s) {
 805                        v->number = my_strtod(&s);
 806                        if (v->type & VF_USER) {
 807                                skip_spaces(&s);
 808                                if (*s != '\0')
 809                                        v->type &= ~VF_USER;
 810                        }
 811                } else {
 812                        v->type &= ~VF_USER;
 813                }
 814                v->type |= VF_CACHED;
 815        }
 816        return v->number;
 817}
 818
 819/* Used for operands of bitwise ops */
 820static unsigned long getvar_i_int(var *v)
 821{
 822        double d = getvar_i(v);
 823
 824        /* Casting doubles to longs is undefined for values outside
 825         * of target type range. Try to widen it as much as possible */
 826        if (d >= 0)
 827                return (unsigned long)d;
 828        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
 829        return - (long) (unsigned long) (-d);
 830}
 831
 832static var *copyvar(var *dest, const var *src)
 833{
 834        if (dest != src) {
 835                clrvar(dest);
 836                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
 837                dest->number = src->number;
 838                if (src->string)
 839                        dest->string = xstrdup(src->string);
 840        }
 841        handle_special(dest);
 842        return dest;
 843}
 844
 845static var *incvar(var *v)
 846{
 847        return setvar_i(v, getvar_i(v) + 1.);
 848}
 849
 850/* return true if v is number or numeric string */
 851static int is_numeric(var *v)
 852{
 853        getvar_i(v);
 854        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
 855}
 856
 857/* return 1 when value of v corresponds to true, 0 otherwise */
 858static int istrue(var *v)
 859{
 860        if (is_numeric(v))
 861                return (v->number == 0) ? 0 : 1;
 862        return (v->string && *(v->string)) ? 1 : 0;
 863}
 864
 865/* temporary variables allocator. Last allocated should be first freed */
 866static var *nvalloc(int n)
 867{
 868        nvblock *pb = NULL;
 869        var *v, *r;
 870        int size;
 871
 872        while (g_cb) {
 873                pb = g_cb;
 874                if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
 875                g_cb = g_cb->next;
 876        }
 877
 878        if (!g_cb) {
 879                size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
 880                g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
 881                g_cb->size = size;
 882                g_cb->pos = g_cb->nv;
 883                g_cb->prev = pb;
 884                /*g_cb->next = NULL; - xzalloc did it */
 885                if (pb) pb->next = g_cb;
 886        }
 887
 888        v = r = g_cb->pos;
 889        g_cb->pos += n;
 890
 891        while (v < g_cb->pos) {
 892                v->type = 0;
 893                v->string = NULL;
 894                v++;
 895        }
 896
 897        return r;
 898}
 899
 900static void nvfree(var *v)
 901{
 902        var *p;
 903
 904        if (v < g_cb->nv || v >= g_cb->pos)
 905                syntax_error(EMSG_INTERNAL_ERROR);
 906
 907        for (p = v; p < g_cb->pos; p++) {
 908                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
 909                        clear_array(iamarray(p));
 910                        free(p->x.array->items);
 911                        free(p->x.array);
 912                }
 913                if (p->type & VF_WALK)
 914                        free(p->x.walker);
 915
 916                clrvar(p);
 917        }
 918
 919        g_cb->pos = v;
 920        while (g_cb->prev && g_cb->pos == g_cb->nv) {
 921                g_cb = g_cb->prev;
 922        }
 923}
 924
 925/* ------- awk program text parsing ------- */
 926
 927/* Parse next token pointed by global pos, place results into global ttt.
 928 * If token isn't expected, give away. Return token class
 929 */
 930static uint32_t next_token(uint32_t expected)
 931{
 932#define concat_inserted (G.next_token__concat_inserted)
 933#define save_tclass     (G.next_token__save_tclass)
 934#define save_info       (G.next_token__save_info)
 935/* Initialized to TC_OPTERM: */
 936#define ltclass         (G.next_token__ltclass)
 937
 938        char *p, *pp, *s;
 939        const char *tl;
 940        uint32_t tc;
 941        const uint32_t *ti;
 942        int l;
 943
 944        if (t_rollback) {
 945                t_rollback = FALSE;
 946
 947        } else if (concat_inserted) {
 948                concat_inserted = FALSE;
 949                t_tclass = save_tclass;
 950                t_info = save_info;
 951
 952        } else {
 953                p = g_pos;
 954 readnext:
 955                skip_spaces(&p);
 956                g_lineno = t_lineno;
 957                if (*p == '#')
 958                        while (*p != '\n' && *p != '\0')
 959                                p++;
 960
 961                if (*p == '\n')
 962                        t_lineno++;
 963
 964                if (*p == '\0') {
 965                        tc = TC_EOF;
 966
 967                } else if (*p == '\"') {
 968                        /* it's a string */
 969                        t_string = s = ++p;
 970                        while (*p != '\"') {
 971                                if (*p == '\0' || *p == '\n')
 972                                        syntax_error(EMSG_UNEXP_EOS);
 973                                *(s++) = nextchar(&p);
 974                        }
 975                        p++;
 976                        *s = '\0';
 977                        tc = TC_STRING;
 978
 979                } else if ((expected & TC_REGEXP) && *p == '/') {
 980                        /* it's regexp */
 981                        t_string = s = ++p;
 982                        while (*p != '/') {
 983                                if (*p == '\0' || *p == '\n')
 984                                        syntax_error(EMSG_UNEXP_EOS);
 985                                *s = *p++;
 986                                if (*s++ == '\\') {
 987                                        pp = p;
 988                                        *(s-1) = bb_process_escape_sequence((const char **)&p);
 989                                        if (*pp == '\\')
 990                                                *s++ = '\\';
 991                                        if (p == pp)
 992                                                *s++ = *p++;
 993                                }
 994                        }
 995                        p++;
 996                        *s = '\0';
 997                        tc = TC_REGEXP;
 998
 999                } else if (*p == '.' || isdigit(*p)) {
1000                        /* it's a number */
1001                        t_double = my_strtod(&p);
1002                        if (*p == '.')
1003                                syntax_error(EMSG_UNEXP_TOKEN);
1004                        tc = TC_NUMBER;
1005
1006                } else {
1007                        /* search for something known */
1008                        tl = tokenlist;
1009                        tc = 0x00000001;
1010                        ti = tokeninfo;
1011                        while (*tl) {
1012                                l = *(tl++);
1013                                if (l == NTCC) {
1014                                        tc <<= 1;
1015                                        continue;
1016                                }
1017                                /* if token class is expected, token
1018                                 * matches and it's not a longer word,
1019                                 * then this is what we are looking for
1020                                 */
1021                                if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022                                 && *tl == *p && strncmp(p, tl, l) == 0
1023                                 && !((tc & TC_WORD) && isalnum_(p[l]))
1024                                ) {
1025                                        t_info = *ti;
1026                                        p += l;
1027                                        break;
1028                                }
1029                                ti++;
1030                                tl += l;
1031                        }
1032
1033                        if (!*tl) {
1034                                /* it's a name (var/array/function),
1035                                 * otherwise it's something wrong
1036                                 */
1037                                if (!isalnum_(*p))
1038                                        syntax_error(EMSG_UNEXP_TOKEN);
1039
1040                                t_string = --p;
1041                                while (isalnum_(*(++p))) {
1042                                        *(p-1) = *p;
1043                                }
1044                                *(p-1) = '\0';
1045                                tc = TC_VARIABLE;
1046                                /* also consume whitespace between functionname and bracket */
1047                                if (!(expected & TC_VARIABLE))
1048                                        skip_spaces(&p);
1049                                if (*p == '(') {
1050                                        tc = TC_FUNCTION;
1051                                } else {
1052                                        if (*p == '[') {
1053                                                p++;
1054                                                tc = TC_ARRAY;
1055                                        }
1056                                }
1057                        }
1058                }
1059                g_pos = p;
1060
1061                /* skipping newlines in some cases */
1062                if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063                        goto readnext;
1064
1065                /* insert concatenation operator when needed */
1066                if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067                        concat_inserted = TRUE;
1068                        save_tclass = tc;
1069                        save_info = t_info;
1070                        tc = TC_BINOP;
1071                        t_info = OC_CONCAT | SS | P(35);
1072                }
1073
1074                t_tclass = tc;
1075        }
1076        ltclass = t_tclass;
1077
1078        /* Are we ready for this? */
1079        if (!(ltclass & expected))
1080                syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1082
1083        return ltclass;
1084#undef concat_inserted
1085#undef save_tclass
1086#undef save_info
1087#undef ltclass
1088}
1089
1090static void rollback_token(void)
1091{
1092        t_rollback = TRUE;
1093}
1094
1095static node *new_node(uint32_t info)
1096{
1097        node *n;
1098
1099        n = xzalloc(sizeof(node));
1100        n->info = info;
1101        n->lineno = g_lineno;
1102        return n;
1103}
1104
1105static node *mk_re_node(const char *s, node *n, regex_t *re)
1106{
1107        n->info = OC_REGEXP;
1108        n->l.re = re;
1109        n->r.ire = re + 1;
1110        xregcomp(re, s, REG_EXTENDED);
1111        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1112
1113        return n;
1114}
1115
1116static node *condition(void)
1117{
1118        next_token(TC_SEQSTART);
1119        return parse_expr(TC_SEQTERM);
1120}
1121
1122/* parse expression terminated by given argument, return ptr
1123 * to built subtree. Terminator is eaten by parse_expr */
1124static node *parse_expr(uint32_t iexp)
1125{
1126        node sn;
1127        node *cn = &sn;
1128        node *vn, *glptr;
1129        uint32_t tc, xtc;
1130        var *v;
1131
1132        sn.info = PRIMASK;
1133        sn.r.n = glptr = NULL;
1134        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1135
1136        while (!((tc = next_token(xtc)) & iexp)) {
1137                if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138                        /* input redirection (<) attached to glptr node */
1139                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1140                        cn->a.n = glptr;
1141                        xtc = TC_OPERAND | TC_UOPPRE;
1142                        glptr = NULL;
1143
1144                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145                        /* for binary and postfix-unary operators, jump back over
1146                         * previous operators with higher priority */
1147                        vn = cn;
1148                        while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149                         || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1150                                vn = vn->a.n;
1151                        if ((t_info & OPCLSMASK) == OC_TERNARY)
1152                                t_info += P(6);
1153                        cn = vn->a.n->r.n = new_node(t_info);
1154                        cn->a.n = vn->a.n;
1155                        if (tc & TC_BINOP) {
1156                                cn->l.n = vn;
1157                                xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158                                if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1159                                        /* it's a pipe */
1160                                        next_token(TC_GETLINE);
1161                                        /* give maximum priority to this pipe */
1162                                        cn->info &= ~PRIMASK;
1163                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1164                                }
1165                        } else {
1166                                cn->r.n = vn;
1167                                xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168                        }
1169                        vn->a.n = cn;
1170
1171                } else {
1172                        /* for operands and prefix-unary operators, attach them
1173                         * to last node */
1174                        vn = cn;
1175                        cn = vn->r.n = new_node(t_info);
1176                        cn->a.n = vn;
1177                        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178                        if (tc & (TC_OPERAND | TC_REGEXP)) {
1179                                xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180                                /* one should be very careful with switch on tclass -
1181                                 * only simple tclasses should be used! */
1182                                switch (tc) {
1183                                case TC_VARIABLE:
1184                                case TC_ARRAY:
1185                                        cn->info = OC_VAR;
1186                                        v = hash_search(ahash, t_string);
1187                                        if (v != NULL) {
1188                                                cn->info = OC_FNARG;
1189                                                cn->l.i = v->x.aidx;
1190                                        } else {
1191                                                cn->l.v = newvar(t_string);
1192                                        }
1193                                        if (tc & TC_ARRAY) {
1194                                                cn->info |= xS;
1195                                                cn->r.n = parse_expr(TC_ARRTERM);
1196                                        }
1197                                        break;
1198
1199                                case TC_NUMBER:
1200                                case TC_STRING:
1201                                        cn->info = OC_VAR;
1202                                        v = cn->l.v = xzalloc(sizeof(var));
1203                                        if (tc & TC_NUMBER)
1204                                                setvar_i(v, t_double);
1205                                        else
1206                                                setvar_s(v, t_string);
1207                                        break;
1208
1209                                case TC_REGEXP:
1210                                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1211                                        break;
1212
1213                                case TC_FUNCTION:
1214                                        cn->info = OC_FUNC;
1215                                        cn->r.f = newfunc(t_string);
1216                                        cn->l.n = condition();
1217                                        break;
1218
1219                                case TC_SEQSTART:
1220                                        cn = vn->r.n = parse_expr(TC_SEQTERM);
1221                                        cn->a.n = vn;
1222                                        break;
1223
1224                                case TC_GETLINE:
1225                                        glptr = cn;
1226                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227                                        break;
1228
1229                                case TC_BUILTIN:
1230                                        cn->l.n = condition();
1231                                        break;
1232                                }
1233                        }
1234                }
1235        }
1236        return sn.r.n;
1237}
1238
1239/* add node to chain. Return ptr to alloc'd node */
1240static node *chain_node(uint32_t info)
1241{
1242        node *n;
1243
1244        if (!seq->first)
1245                seq->first = seq->last = new_node(0);
1246
1247        if (seq->programname != g_progname) {
1248                seq->programname = g_progname;
1249                n = chain_node(OC_NEWSOURCE);
1250                n->l.s = xstrdup(g_progname);
1251        }
1252
1253        n = seq->last;
1254        n->info = info;
1255        seq->last = n->a.n = new_node(OC_DONE);
1256
1257        return n;
1258}
1259
1260static void chain_expr(uint32_t info)
1261{
1262        node *n;
1263
1264        n = chain_node(info);
1265        n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266        if (t_tclass & TC_GRPTERM)
1267                rollback_token();
1268}
1269
1270static node *chain_loop(node *nn)
1271{
1272        node *n, *n2, *save_brk, *save_cont;
1273
1274        save_brk = break_ptr;
1275        save_cont = continue_ptr;
1276
1277        n = chain_node(OC_BR | Vx);
1278        continue_ptr = new_node(OC_EXEC);
1279        break_ptr = new_node(OC_EXEC);
1280        chain_group();
1281        n2 = chain_node(OC_EXEC | Vx);
1282        n2->l.n = nn;
1283        n2->a.n = n;
1284        continue_ptr->a.n = n2;
1285        break_ptr->a.n = n->r.n = seq->last;
1286
1287        continue_ptr = save_cont;
1288        break_ptr = save_brk;
1289
1290        return n;
1291}
1292
1293/* parse group and attach it to chain */
1294static void chain_group(void)
1295{
1296        uint32_t c;
1297        node *n, *n2, *n3;
1298
1299        do {
1300                c = next_token(TC_GRPSEQ);
1301        } while (c & TC_NEWLINE);
1302
1303        if (c & TC_GRPSTART) {
1304                while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305                        if (t_tclass & TC_NEWLINE) continue;
1306                        rollback_token();
1307                        chain_group();
1308                }
1309        } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1310                rollback_token();
1311                chain_expr(OC_EXEC | Vx);
1312        } else {                                                /* TC_STATEMNT */
1313                switch (t_info & OPCLSMASK) {
1314                case ST_IF:
1315                        n = chain_node(OC_BR | Vx);
1316                        n->l.n = condition();
1317                        chain_group();
1318                        n2 = chain_node(OC_EXEC);
1319                        n->r.n = seq->last;
1320                        if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1321                                chain_group();
1322                                n2->a.n = seq->last;
1323                        } else {
1324                                rollback_token();
1325                        }
1326                        break;
1327
1328                case ST_WHILE:
1329                        n2 = condition();
1330                        n = chain_loop(NULL);
1331                        n->l.n = n2;
1332                        break;
1333
1334                case ST_DO:
1335                        n2 = chain_node(OC_EXEC);
1336                        n = chain_loop(NULL);
1337                        n2->a.n = n->a.n;
1338                        next_token(TC_WHILE);
1339                        n->l.n = condition();
1340                        break;
1341
1342                case ST_FOR:
1343                        next_token(TC_SEQSTART);
1344                        n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345                        if (t_tclass & TC_SEQTERM) {    /* for-in */
1346                                if ((n2->info & OPCLSMASK) != OC_IN)
1347                                        syntax_error(EMSG_UNEXP_TOKEN);
1348                                n = chain_node(OC_WALKINIT | VV);
1349                                n->l.n = n2->l.n;
1350                                n->r.n = n2->r.n;
1351                                n = chain_loop(NULL);
1352                                n->info = OC_WALKNEXT | Vx;
1353                                n->l.n = n2->l.n;
1354                        } else {                        /* for (;;) */
1355                                n = chain_node(OC_EXEC | Vx);
1356                                n->l.n = n2;
1357                                n2 = parse_expr(TC_SEMICOL);
1358                                n3 = parse_expr(TC_SEQTERM);
1359                                n = chain_loop(n3);
1360                                n->l.n = n2;
1361                                if (!n2)
1362                                        n->info = OC_EXEC;
1363                        }
1364                        break;
1365
1366                case OC_PRINT:
1367                case OC_PRINTF:
1368                        n = chain_node(t_info);
1369                        n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370                        if (t_tclass & TC_OUTRDR) {
1371                                n->info |= t_info;
1372                                n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1373                        }
1374                        if (t_tclass & TC_GRPTERM)
1375                                rollback_token();
1376                        break;
1377
1378                case OC_BREAK:
1379                        n = chain_node(OC_EXEC);
1380                        n->a.n = break_ptr;
1381                        break;
1382
1383                case OC_CONTINUE:
1384                        n = chain_node(OC_EXEC);
1385                        n->a.n = continue_ptr;
1386                        break;
1387
1388                /* delete, next, nextfile, return, exit */
1389                default:
1390                        chain_expr(t_info);
1391                }
1392        }
1393}
1394
1395static void parse_program(char *p)
1396{
1397        uint32_t tclass;
1398        node *cn;
1399        func *f;
1400        var *v;
1401
1402        g_pos = p;
1403        t_lineno = 1;
1404        while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405                        TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1406
1407                if (tclass & TC_OPTERM)
1408                        continue;
1409
1410                seq = &mainseq;
1411                if (tclass & TC_BEGIN) {
1412                        seq = &beginseq;
1413                        chain_group();
1414
1415                } else if (tclass & TC_END) {
1416                        seq = &endseq;
1417                        chain_group();
1418
1419                } else if (tclass & TC_FUNCDECL) {
1420                        next_token(TC_FUNCTION);
1421                        g_pos++;
1422                        f = newfunc(t_string);
1423                        f->body.first = NULL;
1424                        f->nargs = 0;
1425                        while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426                                v = findvar(ahash, t_string);
1427                                v->x.aidx = (f->nargs)++;
1428
1429                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1430                                        break;
1431                        }
1432                        seq = &(f->body);
1433                        chain_group();
1434                        clear_array(ahash);
1435
1436                } else if (tclass & TC_OPSEQ) {
1437                        rollback_token();
1438                        cn = chain_node(OC_TEST);
1439                        cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440                        if (t_tclass & TC_GRPSTART) {
1441                                rollback_token();
1442                                chain_group();
1443                        } else {
1444                                chain_node(OC_PRINT);
1445                        }
1446                        cn->r.n = mainseq.last;
1447
1448                } else /* if (tclass & TC_GRPSTART) */ {
1449                        rollback_token();
1450                        chain_group();
1451                }
1452        }
1453}
1454
1455
1456/* -------- program execution part -------- */
1457
1458static node *mk_splitter(const char *s, tsplitter *spl)
1459{
1460        regex_t *re, *ire;
1461        node *n;
1462
1463        re = &spl->re[0];
1464        ire = &spl->re[1];
1465        n = &spl->n;
1466        if ((n->info & OPCLSMASK) == OC_REGEXP) {
1467                regfree(re);
1468                regfree(ire); // TODO: nuke ire, use re+1?
1469        }
1470        if (strlen(s) > 1) {
1471                mk_re_node(s, n, re);
1472        } else {
1473                n->info = (uint32_t) *s;
1474        }
1475
1476        return n;
1477}
1478
1479/* use node as a regular expression. Supplied with node ptr and regex_t
1480 * storage space. Return ptr to regex (if result points to preg, it should
1481 * be later regfree'd manually
1482 */
1483static regex_t *as_regex(node *op, regex_t *preg)
1484{
1485        int cflags;
1486        var *v;
1487        const char *s;
1488
1489        if ((op->info & OPCLSMASK) == OC_REGEXP) {
1490                return icase ? op->r.ire : op->l.re;
1491        }
1492        v = nvalloc(1);
1493        s = getvar_s(evaluate(op, v));
1494
1495        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1496        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1497         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1498         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1499         * (maybe gsub is not supposed to use REG_EXTENDED?).
1500         */
1501        if (regcomp(preg, s, cflags)) {
1502                cflags &= ~REG_EXTENDED;
1503                xregcomp(preg, s, cflags);
1504        }
1505        nvfree(v);
1506        return preg;
1507}
1508
1509/* gradually increasing buffer */
1510static void qrealloc(char **b, int n, int *size)
1511{
1512        if (!*b || n >= *size) {
1513                *size = n + (n>>1) + 80;
1514                *b = xrealloc(*b, *size);
1515        }
1516}
1517
1518/* resize field storage space */
1519static void fsrealloc(int size)
1520{
1521        int i;
1522
1523        if (size >= maxfields) {
1524                i = maxfields;
1525                maxfields = size + 16;
1526                Fields = xrealloc(Fields, maxfields * sizeof(var));
1527                for (; i < maxfields; i++) {
1528                        Fields[i].type = VF_SPECIAL;
1529                        Fields[i].string = NULL;
1530                }
1531        }
1532
1533        if (size < nfields) {
1534                for (i = size; i < nfields; i++) {
1535                        clrvar(Fields + i);
1536                }
1537        }
1538        nfields = size;
1539}
1540
1541static int awk_split(const char *s, node *spl, char **slist)
1542{
1543        int l, n = 0;
1544        char c[4];
1545        char *s1;
1546        regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1547
1548        /* in worst case, each char would be a separate field */
1549        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1550        strcpy(s1, s);
1551
1552        c[0] = c[1] = (char)spl->info;
1553        c[2] = c[3] = '\0';
1554        if (*getvar_s(intvar[RS]) == '\0')
1555                c[2] = '\n';
1556
1557        if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1558                if (!*s)
1559                        return n; /* "": zero fields */
1560                n++; /* at least one field will be there */
1561                do {
1562                        l = strcspn(s, c+2); /* len till next NUL or \n */
1563                        if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1564                         && pmatch[0].rm_so <= l
1565                        ) {
1566                                l = pmatch[0].rm_so;
1567                                if (pmatch[0].rm_eo == 0) {
1568                                        l++;
1569                                        pmatch[0].rm_eo++;
1570                                }
1571                                n++; /* we saw yet another delimiter */
1572                        } else {
1573                                pmatch[0].rm_eo = l;
1574                                if (s[l])
1575                                        pmatch[0].rm_eo++;
1576                        }
1577                        memcpy(s1, s, l);
1578                        /* make sure we remove *all* of the separator chars */
1579                        do {
1580                                s1[l] = '\0';
1581                        } while (++l < pmatch[0].rm_eo);
1582                        nextword(&s1);
1583                        s += pmatch[0].rm_eo;
1584                } while (*s);
1585                return n;
1586        }
1587        if (c[0] == '\0') {  /* null split */
1588                while (*s) {
1589                        *s1++ = *s++;
1590                        *s1++ = '\0';
1591                        n++;
1592                }
1593                return n;
1594        }
1595        if (c[0] != ' ') {  /* single-character split */
1596                if (icase) {
1597                        c[0] = toupper(c[0]);
1598                        c[1] = tolower(c[1]);
1599                }
1600                if (*s1) n++;
1601                while ((s1 = strpbrk(s1, c))) {
1602                        *s1++ = '\0';
1603                        n++;
1604                }
1605                return n;
1606        }
1607        /* space split */
1608        while (*s) {
1609                s = skip_whitespace(s);
1610                if (!*s) break;
1611                n++;
1612                while (*s && !isspace(*s))
1613                        *s1++ = *s++;
1614                *s1++ = '\0';
1615        }
1616        return n;
1617}
1618
1619static void split_f0(void)
1620{
1621/* static char *fstrings; */
1622#define fstrings (G.split_f0__fstrings)
1623
1624        int i, n;
1625        char *s;
1626
1627        if (is_f0_split)
1628                return;
1629
1630        is_f0_split = TRUE;
1631        free(fstrings);
1632        fsrealloc(0);
1633        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1634        fsrealloc(n);
1635        s = fstrings;
1636        for (i = 0; i < n; i++) {
1637                Fields[i].string = nextword(&s);
1638                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1639        }
1640
1641        /* set NF manually to avoid side effects */
1642        clrvar(intvar[NF]);
1643        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1644        intvar[NF]->number = nfields;
1645#undef fstrings
1646}
1647
1648/* perform additional actions when some internal variables changed */
1649static void handle_special(var *v)
1650{
1651        int n;
1652        char *b;
1653        const char *sep, *s;
1654        int sl, l, len, i, bsize;
1655
1656        if (!(v->type & VF_SPECIAL))
1657                return;
1658
1659        if (v == intvar[NF]) {
1660                n = (int)getvar_i(v);
1661                fsrealloc(n);
1662
1663                /* recalculate $0 */
1664                sep = getvar_s(intvar[OFS]);
1665                sl = strlen(sep);
1666                b = NULL;
1667                len = 0;
1668                for (i = 0; i < n; i++) {
1669                        s = getvar_s(&Fields[i]);
1670                        l = strlen(s);
1671                        if (b) {
1672                                memcpy(b+len, sep, sl);
1673                                len += sl;
1674                        }
1675                        qrealloc(&b, len+l+sl, &bsize);
1676                        memcpy(b+len, s, l);
1677                        len += l;
1678                }
1679                if (b)
1680                        b[len] = '\0';
1681                setvar_p(intvar[F0], b);
1682                is_f0_split = TRUE;
1683
1684        } else if (v == intvar[F0]) {
1685                is_f0_split = FALSE;
1686
1687        } else if (v == intvar[FS]) {
1688                mk_splitter(getvar_s(v), &fsplitter);
1689
1690        } else if (v == intvar[RS]) {
1691                mk_splitter(getvar_s(v), &rsplitter);
1692
1693        } else if (v == intvar[IGNORECASE]) {
1694                icase = istrue(v);
1695
1696        } else {                                /* $n */
1697                n = getvar_i(intvar[NF]);
1698                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1699                /* right here v is invalid. Just to note... */
1700        }
1701}
1702
1703/* step through func/builtin/etc arguments */
1704static node *nextarg(node **pn)
1705{
1706        node *n;
1707
1708        n = *pn;
1709        if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1710                *pn = n->r.n;
1711                n = n->l.n;
1712        } else {
1713                *pn = NULL;
1714        }
1715        return n;
1716}
1717
1718static void hashwalk_init(var *v, xhash *array)
1719{
1720        char **w;
1721        hash_item *hi;
1722        unsigned i;
1723
1724        if (v->type & VF_WALK)
1725                free(v->x.walker);
1726
1727        v->type |= VF_WALK;
1728        w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1729        w[0] = w[1] = (char *)(w + 2);
1730        for (i = 0; i < array->csize; i++) {
1731                hi = array->items[i];
1732                while (hi) {
1733                        strcpy(*w, hi->name);
1734                        nextword(w);
1735                        hi = hi->next;
1736                }
1737        }
1738}
1739
1740static int hashwalk_next(var *v)
1741{
1742        char **w;
1743
1744        w = v->x.walker;
1745        if (w[1] == w[0])
1746                return FALSE;
1747
1748        setvar_s(v, nextword(w+1));
1749        return TRUE;
1750}
1751
1752/* evaluate node, return 1 when result is true, 0 otherwise */
1753static int ptest(node *pattern)
1754{
1755        /* ptest__v is "static": to save stack space? */
1756        return istrue(evaluate(pattern, &G.ptest__v));
1757}
1758
1759/* read next record from stream rsm into a variable v */
1760static int awk_getline(rstream *rsm, var *v)
1761{
1762        char *b;
1763        regmatch_t pmatch[2];
1764        int a, p, pp=0, size;
1765        int fd, so, eo, r, rp;
1766        char c, *m, *s;
1767
1768        /* we're using our own buffer since we need access to accumulating
1769         * characters
1770         */
1771        fd = fileno(rsm->F);
1772        m = rsm->buffer;
1773        a = rsm->adv;
1774        p = rsm->pos;
1775        size = rsm->size;
1776        c = (char) rsplitter.n.info;
1777        rp = 0;
1778
1779        if (!m) qrealloc(&m, 256, &size);
1780        do {
1781                b = m + a;
1782                so = eo = p;
1783                r = 1;
1784                if (p > 0) {
1785                        if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1786                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1787                                                        b, 1, pmatch, 0) == 0) {
1788                                        so = pmatch[0].rm_so;
1789                                        eo = pmatch[0].rm_eo;
1790                                        if (b[eo] != '\0')
1791                                                break;
1792                                }
1793                        } else if (c != '\0') {
1794                                s = strchr(b+pp, c);
1795                                if (!s) s = memchr(b+pp, '\0', p - pp);
1796                                if (s) {
1797                                        so = eo = s-b;
1798                                        eo++;
1799                                        break;
1800                                }
1801                        } else {
1802                                while (b[rp] == '\n')
1803                                        rp++;
1804                                s = strstr(b+rp, "\n\n");
1805                                if (s) {
1806                                        so = eo = s-b;
1807                                        while (b[eo] == '\n') eo++;
1808                                        if (b[eo] != '\0')
1809                                                break;
1810                                }
1811                        }
1812                }
1813
1814                if (a > 0) {
1815                        memmove(m, (const void *)(m+a), p+1);
1816                        b = m;
1817                        a = 0;
1818                }
1819
1820                qrealloc(&m, a+p+128, &size);
1821                b = m + a;
1822                pp = p;
1823                p += safe_read(fd, b+p, size-p-1);
1824                if (p < pp) {
1825                        p = 0;
1826                        r = 0;
1827                        setvar_i(intvar[ERRNO], errno);
1828                }
1829                b[p] = '\0';
1830
1831        } while (p > pp);
1832
1833        if (p == 0) {
1834                r--;
1835        } else {
1836                c = b[so]; b[so] = '\0';
1837                setvar_s(v, b+rp);
1838                v->type |= VF_USER;
1839                b[so] = c;
1840                c = b[eo]; b[eo] = '\0';
1841                setvar_s(intvar[RT], b+so);
1842                b[eo] = c;
1843        }
1844
1845        rsm->buffer = m;
1846        rsm->adv = a + eo;
1847        rsm->pos = p - eo;
1848        rsm->size = size;
1849
1850        return r;
1851}
1852
1853static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1854{
1855        int r = 0;
1856        char c;
1857        const char *s = format;
1858
1859        if (int_as_int && n == (int)n) {
1860                r = snprintf(b, size, "%d", (int)n);
1861        } else {
1862                do { c = *s; } while (c && *++s);
1863                if (strchr("diouxX", c)) {
1864                        r = snprintf(b, size, format, (int)n);
1865                } else if (strchr("eEfgG", c)) {
1866                        r = snprintf(b, size, format, n);
1867                } else {
1868                        syntax_error(EMSG_INV_FMT);
1869                }
1870        }
1871        return r;
1872}
1873
1874/* formatted output into an allocated buffer, return ptr to buffer */
1875static char *awk_printf(node *n)
1876{
1877        char *b = NULL;
1878        char *fmt, *s, *f;
1879        const char *s1;
1880        int i, j, incr, bsize;
1881        char c, c1;
1882        var *v, *arg;
1883
1884        v = nvalloc(1);
1885        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1886
1887        i = 0;
1888        while (*f) {
1889                s = f;
1890                while (*f && (*f != '%' || *(++f) == '%'))
1891                        f++;
1892                while (*f && !isalpha(*f)) {
1893                        if (*f == '*')
1894                                syntax_error("%*x formats are not supported");
1895                        f++;
1896                }
1897
1898                incr = (f - s) + MAXVARFMT;
1899                qrealloc(&b, incr + i, &bsize);
1900                c = *f;
1901                if (c != '\0') f++;
1902                c1 = *f;
1903                *f = '\0';
1904                arg = evaluate(nextarg(&n), v);
1905
1906                j = i;
1907                if (c == 'c' || !c) {
1908                        i += sprintf(b+i, s, is_numeric(arg) ?
1909                                        (char)getvar_i(arg) : *getvar_s(arg));
1910                } else if (c == 's') {
1911                        s1 = getvar_s(arg);
1912                        qrealloc(&b, incr+i+strlen(s1), &bsize);
1913                        i += sprintf(b+i, s, s1);
1914                } else {
1915                        i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1916                }
1917                *f = c1;
1918
1919                /* if there was an error while sprintf, return value is negative */
1920                if (i < j) i = j;
1921        }
1922
1923        b = xrealloc(b, i + 1);
1924        free(fmt);
1925        nvfree(v);
1926        b[i] = '\0';
1927        return b;
1928}
1929
1930/* common substitution routine
1931 * replace (nm) substring of (src) that match (n) with (repl), store
1932 * result into (dest), return number of substitutions. If nm=0, replace
1933 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1934 * subexpression matching (\1-\9)
1935 */
1936static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1937{
1938        char *ds = NULL;
1939        const char *s;
1940        const char *sp;
1941        int c, i, j, di, rl, so, eo, nbs, n, dssize;
1942        regmatch_t pmatch[10];
1943        regex_t sreg, *re;
1944
1945        re = as_regex(rn, &sreg);
1946        if (!src) src = intvar[F0];
1947        if (!dest) dest = intvar[F0];
1948
1949        i = di = 0;
1950        sp = getvar_s(src);
1951        rl = strlen(repl);
1952        while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1953                so = pmatch[0].rm_so;
1954                eo = pmatch[0].rm_eo;
1955
1956                qrealloc(&ds, di + eo + rl, &dssize);
1957                memcpy(ds + di, sp, eo);
1958                di += eo;
1959                if (++i >= nm) {
1960                        /* replace */
1961                        di -= (eo - so);
1962                        nbs = 0;
1963                        for (s = repl; *s; s++) {
1964                                ds[di++] = c = *s;
1965                                if (c == '\\') {
1966                                        nbs++;
1967                                        continue;
1968                                }
1969                                if (c == '&' || (ex && c >= '0' && c <= '9')) {
1970                                        di -= ((nbs + 3) >> 1);
1971                                        j = 0;
1972                                        if (c != '&') {
1973                                                j = c - '0';
1974                                                nbs++;
1975                                        }
1976                                        if (nbs % 2) {
1977                                                ds[di++] = c;
1978                                        } else {
1979                                                n = pmatch[j].rm_eo - pmatch[j].rm_so;
1980                                                qrealloc(&ds, di + rl + n, &dssize);
1981                                                memcpy(ds + di, sp + pmatch[j].rm_so, n);
1982                                                di += n;
1983                                        }
1984                                }
1985                                nbs = 0;
1986                        }
1987                }
1988
1989                sp += eo;
1990                if (i == nm) break;
1991                if (eo == so) {
1992                        ds[di] = *sp++;
1993                        if (!ds[di++]) break;
1994                }
1995        }
1996
1997        qrealloc(&ds, di + strlen(sp), &dssize);
1998        strcpy(ds + di, sp);
1999        setvar_p(dest, ds);
2000        if (re == &sreg) regfree(re);
2001        return i;
2002}
2003
2004static var *exec_builtin(node *op, var *res)
2005{
2006#define tspl (G.exec_builtin__tspl)
2007
2008        int (*to_xxx)(int);
2009        var *tv;
2010        node *an[4];
2011        var *av[4];
2012        const char *as[4];
2013        regmatch_t pmatch[2];
2014        regex_t sreg, *re;
2015        node *spl;
2016        uint32_t isr, info;
2017        int nargs;
2018        time_t tt;
2019        char *s, *s1;
2020        int i, l, ll, n;
2021
2022        tv = nvalloc(4);
2023        isr = info = op->info;
2024        op = op->l.n;
2025
2026        av[2] = av[3] = NULL;
2027        for (i = 0; i < 4 && op; i++) {
2028                an[i] = nextarg(&op);
2029                if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2030                if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2031                isr >>= 1;
2032        }
2033
2034        nargs = i;
2035        if ((uint32_t)nargs < (info >> 30))
2036                syntax_error(EMSG_TOO_FEW_ARGS);
2037
2038        switch (info & OPNMASK) {
2039
2040        case B_a2:
2041#if ENABLE_FEATURE_AWK_LIBM
2042                setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2043#else
2044                syntax_error(EMSG_NO_MATH);
2045#endif
2046                break;
2047
2048        case B_sp:
2049                if (nargs > 2) {
2050                        spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2051                                an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2052                } else {
2053                        spl = &fsplitter.n;
2054                }
2055
2056                n = awk_split(as[0], spl, &s);
2057                s1 = s;
2058                clear_array(iamarray(av[1]));
2059                for (i = 1; i <= n; i++)
2060                        setari_u(av[1], i, nextword(&s1));
2061                free(s);
2062                setvar_i(res, n);
2063                break;
2064
2065        case B_ss:
2066                l = strlen(as[0]);
2067                i = getvar_i(av[1]) - 1;
2068                if (i > l) i = l;
2069                if (i < 0) i = 0;
2070                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2071                if (n < 0) n = 0;
2072                s = xstrndup(as[0]+i, n);
2073                setvar_p(res, s);
2074                break;
2075
2076        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2077         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2078        case B_an:
2079                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2080                break;
2081
2082        case B_co:
2083                setvar_i(res, ~getvar_i_int(av[0]));
2084                break;
2085
2086        case B_ls:
2087                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2088                break;
2089
2090        case B_or:
2091                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2092                break;
2093
2094        case B_rs:
2095                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2096                break;
2097
2098        case B_xo:
2099                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2100                break;
2101
2102        case B_lo:
2103                to_xxx = tolower;
2104                goto lo_cont;
2105
2106        case B_up:
2107                to_xxx = toupper;
2108 lo_cont:
2109                s1 = s = xstrdup(as[0]);
2110                while (*s1) {
2111                        *s1 = (*to_xxx)(*s1);
2112                        s1++;
2113                }
2114                setvar_p(res, s);
2115                break;
2116
2117        case B_ix:
2118                n = 0;
2119                ll = strlen(as[1]);
2120                l = strlen(as[0]) - ll;
2121                if (ll > 0 && l >= 0) {
2122                        if (!icase) {
2123                                s = strstr(as[0], as[1]);
2124                                if (s) n = (s - as[0]) + 1;
2125                        } else {
2126                                /* this piece of code is terribly slow and
2127                                 * really should be rewritten
2128                                 */
2129                                for (i=0; i<=l; i++) {
2130                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2131                                                n = i+1;
2132                                                break;
2133                                        }
2134                                }
2135                        }
2136                }
2137                setvar_i(res, n);
2138                break;
2139
2140        case B_ti:
2141                if (nargs > 1)
2142                        tt = getvar_i(av[1]);
2143                else
2144                        time(&tt);
2145                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2146                i = strftime(g_buf, MAXVARFMT,
2147                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2148                        localtime(&tt));
2149                g_buf[i] = '\0';
2150                setvar_s(res, g_buf);
2151                break;
2152
2153        case B_ma:
2154                re = as_regex(an[1], &sreg);
2155                n = regexec(re, as[0], 1, pmatch, 0);
2156                if (n == 0) {
2157                        pmatch[0].rm_so++;
2158                        pmatch[0].rm_eo++;
2159                } else {
2160                        pmatch[0].rm_so = 0;
2161                        pmatch[0].rm_eo = -1;
2162                }
2163                setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2164                setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2165                setvar_i(res, pmatch[0].rm_so);
2166                if (re == &sreg) regfree(re);
2167                break;
2168
2169        case B_ge:
2170                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2171                break;
2172
2173        case B_gs:
2174                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2175                break;
2176
2177        case B_su:
2178                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2179                break;
2180        }
2181
2182        nvfree(tv);
2183        return res;
2184#undef tspl
2185}
2186
2187/*
2188 * Evaluate node - the heart of the program. Supplied with subtree
2189 * and place where to store result. returns ptr to result.
2190 */
2191#define XC(n) ((n) >> 8)
2192
2193static var *evaluate(node *op, var *res)
2194{
2195/* This procedure is recursive so we should count every byte */
2196#define fnargs (G.evaluate__fnargs)
2197/* seed is initialized to 1 */
2198#define seed   (G.evaluate__seed)
2199#define sreg   (G.evaluate__sreg)
2200
2201        node *op1;
2202        var *v1;
2203        union {
2204                var *v;
2205                const char *s;
2206                double d;
2207                int i;
2208        } L, R;
2209        uint32_t opinfo;
2210        int opn;
2211        union {
2212                char *s;
2213                rstream *rsm;
2214                FILE *F;
2215                var *v;
2216                regex_t *re;
2217                uint32_t info;
2218        } X;
2219
2220        if (!op)
2221                return setvar_s(res, NULL);
2222
2223        v1 = nvalloc(2);
2224
2225        while (op) {
2226                opinfo = op->info;
2227                opn = (opinfo & OPNMASK);
2228                g_lineno = op->lineno;
2229
2230                /* execute inevitable things */
2231                op1 = op->l.n;
2232                if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2233                if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2234                if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2235                if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2236                if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2237
2238                switch (XC(opinfo & OPCLSMASK)) {
2239
2240                /* -- iterative node type -- */
2241
2242                /* test pattern */
2243                case XC( OC_TEST ):
2244                        if ((op1->info & OPCLSMASK) == OC_COMMA) {
2245                                /* it's range pattern */
2246                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2247                                        op->info |= OF_CHECKED;
2248                                        if (ptest(op1->r.n))
2249                                                op->info &= ~OF_CHECKED;
2250
2251                                        op = op->a.n;
2252                                } else {
2253                                        op = op->r.n;
2254                                }
2255                        } else {
2256                                op = (ptest(op1)) ? op->a.n : op->r.n;
2257                        }
2258                        break;
2259
2260                /* just evaluate an expression, also used as unconditional jump */
2261                case XC( OC_EXEC ):
2262                        break;
2263
2264                /* branch, used in if-else and various loops */
2265                case XC( OC_BR ):
2266                        op = istrue(L.v) ? op->a.n : op->r.n;
2267                        break;
2268
2269                /* initialize for-in loop */
2270                case XC( OC_WALKINIT ):
2271                        hashwalk_init(L.v, iamarray(R.v));
2272                        break;
2273
2274                /* get next array item */
2275                case XC( OC_WALKNEXT ):
2276                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2277                        break;
2278
2279                case XC( OC_PRINT ):
2280                case XC( OC_PRINTF ):
2281                        X.F = stdout;
2282                        if (op->r.n) {
2283                                X.rsm = newfile(R.s);
2284                                if (!X.rsm->F) {
2285                                        if (opn == '|') {
2286                                                X.rsm->F = popen(R.s, "w");
2287                                                if (X.rsm->F == NULL)
2288                                                        bb_perror_msg_and_die("popen");
2289                                                X.rsm->is_pipe = 1;
2290                                        } else {
2291                                                X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2292                                        }
2293                                }
2294                                X.F = X.rsm->F;
2295                        }
2296
2297                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2298                                if (!op1) {
2299                                        fputs(getvar_s(intvar[F0]), X.F);
2300                                } else {
2301                                        while (op1) {
2302                                                L.v = evaluate(nextarg(&op1), v1);
2303                                                if (L.v->type & VF_NUMBER) {
2304                                                        fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2305                                                                        getvar_i(L.v), TRUE);
2306                                                        fputs(g_buf, X.F);
2307                                                } else {
2308                                                        fputs(getvar_s(L.v), X.F);
2309                                                }
2310
2311                                                if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2312                                        }
2313                                }
2314                                fputs(getvar_s(intvar[ORS]), X.F);
2315
2316                        } else {        /* OC_PRINTF */
2317                                L.s = awk_printf(op1);
2318                                fputs(L.s, X.F);
2319                                free((char*)L.s);
2320                        }
2321                        fflush(X.F);
2322                        break;
2323
2324                case XC( OC_DELETE ):
2325                        X.info = op1->info & OPCLSMASK;
2326                        if (X.info == OC_VAR) {
2327                                R.v = op1->l.v;
2328                        } else if (X.info == OC_FNARG) {
2329                                R.v = &fnargs[op1->l.i];
2330                        } else {
2331                                syntax_error(EMSG_NOT_ARRAY);
2332                        }
2333
2334                        if (op1->r.n) {
2335                                clrvar(L.v);
2336                                L.s = getvar_s(evaluate(op1->r.n, v1));
2337                                hash_remove(iamarray(R.v), L.s);
2338                        } else {
2339                                clear_array(iamarray(R.v));
2340                        }
2341                        break;
2342
2343                case XC( OC_NEWSOURCE ):
2344                        g_progname = op->l.s;
2345                        break;
2346
2347                case XC( OC_RETURN ):
2348                        copyvar(res, L.v);
2349                        break;
2350
2351                case XC( OC_NEXTFILE ):
2352                        nextfile = TRUE;
2353                case XC( OC_NEXT ):
2354                        nextrec = TRUE;
2355                case XC( OC_DONE ):
2356                        clrvar(res);
2357                        break;
2358
2359                case XC( OC_EXIT ):
2360                        awk_exit(L.d);
2361
2362                /* -- recursive node type -- */
2363
2364                case XC( OC_VAR ):
2365                        L.v = op->l.v;
2366                        if (L.v == intvar[NF])
2367                                split_f0();
2368                        goto v_cont;
2369
2370                case XC( OC_FNARG ):
2371                        L.v = &fnargs[op->l.i];
2372 v_cont:
2373                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2374                        break;
2375
2376                case XC( OC_IN ):
2377                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2378                        break;
2379
2380                case XC( OC_REGEXP ):
2381                        op1 = op;
2382                        L.s = getvar_s(intvar[F0]);
2383                        goto re_cont;
2384
2385                case XC( OC_MATCH ):
2386                        op1 = op->r.n;
2387 re_cont:
2388                        X.re = as_regex(op1, &sreg);
2389                        R.i = regexec(X.re, L.s, 0, NULL, 0);
2390                        if (X.re == &sreg) regfree(X.re);
2391                        setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2392                        break;
2393
2394                case XC( OC_MOVE ):
2395                        /* if source is a temporary string, jusk relink it to dest */
2396                        if (R.v == v1+1 && R.v->string) {
2397                                res = setvar_p(L.v, R.v->string);
2398                                R.v->string = NULL;
2399                        } else {
2400                                res = copyvar(L.v, R.v);
2401                        }
2402                        break;
2403
2404                case XC( OC_TERNARY ):
2405                        if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2406                                syntax_error(EMSG_POSSIBLE_ERROR);
2407                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2408                        break;
2409
2410                case XC( OC_FUNC ):
2411                        if (!op->r.f->body.first)
2412                                syntax_error(EMSG_UNDEF_FUNC);
2413
2414                        X.v = R.v = nvalloc(op->r.f->nargs+1);
2415                        while (op1) {
2416                                L.v = evaluate(nextarg(&op1), v1);
2417                                copyvar(R.v, L.v);
2418                                R.v->type |= VF_CHILD;
2419                                R.v->x.parent = L.v;
2420                                if (++R.v - X.v >= op->r.f->nargs)
2421                                        break;
2422                        }
2423
2424                        R.v = fnargs;
2425                        fnargs = X.v;
2426
2427                        L.s = g_progname;
2428                        res = evaluate(op->r.f->body.first, res);
2429                        g_progname = L.s;
2430
2431                        nvfree(fnargs);
2432                        fnargs = R.v;
2433                        break;
2434
2435                case XC( OC_GETLINE ):
2436                case XC( OC_PGETLINE ):
2437                        if (op1) {
2438                                X.rsm = newfile(L.s);
2439                                if (!X.rsm->F) {
2440                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2441                                                X.rsm->F = popen(L.s, "r");
2442                                                X.rsm->is_pipe = TRUE;
2443                                        } else {
2444                                                X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2445                                        }
2446                                }
2447                        } else {
2448                                if (!iF) iF = next_input_file();
2449                                X.rsm = iF;
2450                        }
2451
2452                        if (!X.rsm->F) {
2453                                setvar_i(intvar[ERRNO], errno);
2454                                setvar_i(res, -1);
2455                                break;
2456                        }
2457
2458                        if (!op->r.n)
2459                                R.v = intvar[F0];
2460
2461                        L.i = awk_getline(X.rsm, R.v);
2462                        if (L.i > 0) {
2463                                if (!op1) {
2464                                        incvar(intvar[FNR]);
2465                                        incvar(intvar[NR]);
2466                                }
2467                        }
2468                        setvar_i(res, L.i);
2469                        break;
2470
2471                /* simple builtins */
2472                case XC( OC_FBLTIN ):
2473                        switch (opn) {
2474
2475                        case F_in:
2476                                R.d = (int)L.d;
2477                                break;
2478
2479                        case F_rn:
2480                                R.d = (double)rand() / (double)RAND_MAX;
2481                                break;
2482#if ENABLE_FEATURE_AWK_LIBM
2483                        case F_co:
2484                                R.d = cos(L.d);
2485                                break;
2486
2487                        case F_ex:
2488                                R.d = exp(L.d);
2489                                break;
2490
2491                        case F_lg:
2492                                R.d = log(L.d);
2493                                break;
2494
2495                        case F_si:
2496                                R.d = sin(L.d);
2497                                break;
2498
2499                        case F_sq:
2500                                R.d = sqrt(L.d);
2501                                break;
2502#else
2503                        case F_co:
2504                        case F_ex:
2505                        case F_lg:
2506                        case F_si:
2507                        case F_sq:
2508                                syntax_error(EMSG_NO_MATH);
2509                                break;
2510#endif
2511                        case F_sr:
2512                                R.d = (double)seed;
2513                                seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2514                                srand(seed);
2515                                break;
2516
2517                        case F_ti:
2518                                R.d = time(NULL);
2519                                break;
2520
2521                        case F_le:
2522                                if (!op1)
2523                                        L.s = getvar_s(intvar[F0]);
2524                                R.d = strlen(L.s);
2525                                break;
2526
2527                        case F_sy:
2528                                fflush(NULL);
2529                                R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2530                                                ? (system(L.s) >> 8) : 0;
2531                                break;
2532
2533                        case F_ff:
2534                                if (!op1)
2535                                        fflush(stdout);
2536                                else {
2537                                        if (L.s && *L.s) {
2538                                                X.rsm = newfile(L.s);
2539                                                fflush(X.rsm->F);
2540                                        } else {
2541                                                fflush(NULL);
2542                                        }
2543                                }
2544                                break;
2545
2546                        case F_cl:
2547                                X.rsm = (rstream *)hash_search(fdhash, L.s);
2548                                if (X.rsm) {
2549                                        R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2550                                        free(X.rsm->buffer);
2551                                        hash_remove(fdhash, L.s);
2552                                }
2553                                if (R.i != 0)
2554                                        setvar_i(intvar[ERRNO], errno);
2555                                R.d = (double)R.i;
2556                                break;
2557                        }
2558                        setvar_i(res, R.d);
2559                        break;
2560
2561                case XC( OC_BUILTIN ):
2562                        res = exec_builtin(op, res);
2563                        break;
2564
2565                case XC( OC_SPRINTF ):
2566                        setvar_p(res, awk_printf(op1));
2567                        break;
2568
2569                case XC( OC_UNARY ):
2570                        X.v = R.v;
2571                        L.d = R.d = getvar_i(R.v);
2572                        switch (opn) {
2573                        case 'P':
2574                                L.d = ++R.d;
2575                                goto r_op_change;
2576                        case 'p':
2577                                R.d++;
2578                                goto r_op_change;
2579                        case 'M':
2580                                L.d = --R.d;
2581                                goto r_op_change;
2582                        case 'm':
2583                                R.d--;
2584                                goto r_op_change;
2585                        case '!':
2586                                L.d = istrue(X.v) ? 0 : 1;
2587                                break;
2588                        case '-':
2589                                L.d = -R.d;
2590                                break;
2591 r_op_change:
2592                                setvar_i(X.v, R.d);
2593                        }
2594                        setvar_i(res, L.d);
2595                        break;
2596
2597                case XC( OC_FIELD ):
2598                        R.i = (int)getvar_i(R.v);
2599                        if (R.i == 0) {
2600                                res = intvar[F0];
2601                        } else {
2602                                split_f0();
2603                                if (R.i > nfields)
2604                                        fsrealloc(R.i);
2605                                res = &Fields[R.i - 1];
2606                        }
2607                        break;
2608
2609                /* concatenation (" ") and index joining (",") */
2610                case XC( OC_CONCAT ):
2611                case XC( OC_COMMA ):
2612                        opn = strlen(L.s) + strlen(R.s) + 2;
2613                        X.s = xmalloc(opn);
2614                        strcpy(X.s, L.s);
2615                        if ((opinfo & OPCLSMASK) == OC_COMMA) {
2616                                L.s = getvar_s(intvar[SUBSEP]);
2617                                X.s = xrealloc(X.s, opn + strlen(L.s));
2618                                strcat(X.s, L.s);
2619                        }
2620                        strcat(X.s, R.s);
2621                        setvar_p(res, X.s);
2622                        break;
2623
2624                case XC( OC_LAND ):
2625                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2626                        break;
2627
2628                case XC( OC_LOR ):
2629                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2630                        break;
2631
2632                case XC( OC_BINARY ):
2633                case XC( OC_REPLACE ):
2634                        R.d = getvar_i(R.v);
2635                        switch (opn) {
2636                        case '+':
2637                                L.d += R.d;
2638                                break;
2639                        case '-':
2640                                L.d -= R.d;
2641                                break;
2642                        case '*':
2643                                L.d *= R.d;
2644                                break;
2645                        case '/':
2646                                if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2647                                L.d /= R.d;
2648                                break;
2649                        case '&':
2650#if ENABLE_FEATURE_AWK_LIBM
2651                                L.d = pow(L.d, R.d);
2652#else
2653                                syntax_error(EMSG_NO_MATH);
2654#endif
2655                                break;
2656                        case '%':
2657                                if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2658                                L.d -= (int)(L.d / R.d) * R.d;
2659                                break;
2660                        }
2661                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2662                        break;
2663
2664                case XC( OC_COMPARE ):
2665                        if (is_numeric(L.v) && is_numeric(R.v)) {
2666                                L.d = getvar_i(L.v) - getvar_i(R.v);
2667                        } else {
2668                                L.s = getvar_s(L.v);
2669                                R.s = getvar_s(R.v);
2670                                L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2671                        }
2672                        switch (opn & 0xfe) {
2673                        case 0:
2674                                R.i = (L.d > 0);
2675                                break;
2676                        case 2:
2677                                R.i = (L.d >= 0);
2678                                break;
2679                        case 4:
2680                                R.i = (L.d == 0);
2681                                break;
2682                        }
2683                        setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2684                        break;
2685
2686                default:
2687                        syntax_error(EMSG_POSSIBLE_ERROR);
2688                }
2689                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2690                        op = op->a.n;
2691                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2692                        break;
2693                if (nextrec)
2694                        break;
2695        }
2696        nvfree(v1);
2697        return res;
2698#undef fnargs
2699#undef seed
2700#undef sreg
2701}
2702
2703
2704/* -------- main & co. -------- */
2705
2706static int awk_exit(int r)
2707{
2708        var tv;
2709        unsigned i;
2710        hash_item *hi;
2711
2712        zero_out_var(&tv);
2713
2714        if (!exiting) {
2715                exiting = TRUE;
2716                nextrec = FALSE;
2717                evaluate(endseq.first, &tv);
2718        }
2719
2720        /* waiting for children */
2721        for (i = 0; i < fdhash->csize; i++) {
2722                hi = fdhash->items[i];
2723                while (hi) {
2724                        if (hi->data.rs.F && hi->data.rs.is_pipe)
2725                                pclose(hi->data.rs.F);
2726                        hi = hi->next;
2727                }
2728        }
2729
2730        exit(r);
2731}
2732
2733/* if expr looks like "var=value", perform assignment and return 1,
2734 * otherwise return 0 */
2735static int is_assignment(const char *expr)
2736{
2737        char *exprc, *s, *s0, *s1;
2738
2739        exprc = xstrdup(expr);
2740        if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2741                free(exprc);
2742                return FALSE;
2743        }
2744
2745        *(s++) = '\0';
2746        s0 = s1 = s;
2747        while (*s)
2748                *(s1++) = nextchar(&s);
2749
2750        *s1 = '\0';
2751        setvar_u(newvar(exprc), s0);
2752        free(exprc);
2753        return TRUE;
2754}
2755
2756/* switch to next input file */
2757static rstream *next_input_file(void)
2758{
2759#define rsm          (G.next_input_file__rsm)
2760#define files_happen (G.next_input_file__files_happen)
2761
2762        FILE *F = NULL;
2763        const char *fname, *ind;
2764
2765        if (rsm.F) fclose(rsm.F);
2766        rsm.F = NULL;
2767        rsm.pos = rsm.adv = 0;
2768
2769        do {
2770                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2771                        if (files_happen)
2772                                return NULL;
2773                        fname = "-";
2774                        F = stdin;
2775                } else {
2776                        ind = getvar_s(incvar(intvar[ARGIND]));
2777                        fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2778                        if (fname && *fname && !is_assignment(fname))
2779                                F = xfopen_stdin(fname);
2780                }
2781        } while (!F);
2782
2783        files_happen = TRUE;
2784        setvar_s(intvar[FILENAME], fname);
2785        rsm.F = F;
2786        return &rsm;
2787#undef rsm
2788#undef files_happen
2789}
2790
2791int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2792int awk_main(int argc, char **argv)
2793{
2794        unsigned opt;
2795        char *opt_F, *opt_W;
2796        llist_t *list_v = NULL;
2797        llist_t *list_f = NULL;
2798        int i, j;
2799        var *v;
2800        var tv;
2801        char **envp;
2802        char *vnames = (char *)vNames; /* cheat */
2803        char *vvalues = (char *)vValues;
2804
2805        INIT_G();
2806
2807        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2808         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2809        if (ENABLE_LOCALE_SUPPORT)
2810                setlocale(LC_NUMERIC, "C");
2811
2812        zero_out_var(&tv);
2813
2814        /* allocate global buffer */
2815        g_buf = xmalloc(MAXVARFMT + 1);
2816
2817        vhash = hash_init();
2818        ahash = hash_init();
2819        fdhash = hash_init();
2820        fnhash = hash_init();
2821
2822        /* initialize variables */
2823        for (i = 0; *vnames; i++) {
2824                intvar[i] = v = newvar(nextword(&vnames));
2825                if (*vvalues != '\377')
2826                        setvar_s(v, nextword(&vvalues));
2827                else
2828                        setvar_i(v, 0);
2829
2830                if (*vnames == '*') {
2831                        v->type |= VF_SPECIAL;
2832                        vnames++;
2833                }
2834        }
2835
2836        handle_special(intvar[FS]);
2837        handle_special(intvar[RS]);
2838
2839        newfile("/dev/stdin")->F = stdin;
2840        newfile("/dev/stdout")->F = stdout;
2841        newfile("/dev/stderr")->F = stderr;
2842
2843        /* Huh, people report that sometimes environ is NULL. Oh well. */
2844        if (environ) for (envp = environ; *envp; envp++) {
2845                /* environ is writable, thus we don't strdup it needlessly */
2846                char *s = *envp;
2847                char *s1 = strchr(s, '=');
2848                if (s1) {
2849                        *s1 = '\0';
2850                        /* Both findvar and setvar_u take const char*
2851                         * as 2nd arg -> environment is not trashed */
2852                        setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2853                        *s1 = '=';
2854                }
2855        }
2856        opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2857        opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2858        argv += optind;
2859        argc -= optind;
2860        if (opt & 0x1)
2861                setvar_s(intvar[FS], opt_F); // -F
2862        while (list_v) { /* -v */
2863                if (!is_assignment(llist_pop(&list_v)))
2864                        bb_show_usage();
2865        }
2866        if (list_f) { /* -f */
2867                do {
2868                        char *s = NULL;
2869                        FILE *from_file;
2870
2871                        g_progname = llist_pop(&list_f);
2872                        from_file = xfopen_stdin(g_progname);
2873                        /* one byte is reserved for some trick in next_token */
2874                        for (i = j = 1; j > 0; i += j) {
2875                                s = xrealloc(s, i + 4096);
2876                                j = fread(s + i, 1, 4094, from_file);
2877                        }
2878                        s[i] = '\0';
2879                        fclose(from_file);
2880                        parse_program(s + 1);
2881                        free(s);
2882                } while (list_f);
2883                argc++;
2884        } else { // no -f: take program from 1st parameter
2885                if (!argc)
2886                        bb_show_usage();
2887                g_progname = "cmd. line";
2888                parse_program(*argv++);
2889        }
2890        if (opt & 0x8) // -W
2891                bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2892
2893        /* fill in ARGV array */
2894        setvar_i(intvar[ARGC], argc);
2895        setari_u(intvar[ARGV], 0, "awk");
2896        i = 0;
2897        while (*argv)
2898                setari_u(intvar[ARGV], ++i, *argv++);
2899
2900        evaluate(beginseq.first, &tv);
2901        if (!mainseq.first && !endseq.first)
2902                awk_exit(EXIT_SUCCESS);
2903
2904        /* input file could already be opened in BEGIN block */
2905        if (!iF) iF = next_input_file();
2906
2907        /* passing through input files */
2908        while (iF) {
2909                nextfile = FALSE;
2910                setvar_i(intvar[FNR], 0);
2911
2912                while ((i = awk_getline(iF, intvar[F0])) > 0) {
2913                        nextrec = FALSE;
2914                        incvar(intvar[NR]);
2915                        incvar(intvar[FNR]);
2916                        evaluate(mainseq.first, &tv);
2917
2918                        if (nextfile)
2919                                break;
2920                }
2921
2922                if (i < 0)
2923                        syntax_error(strerror(errno));
2924
2925                iF = next_input_file();
2926        }
2927
2928        awk_exit(EXIT_SUCCESS);
2929        /*return 0;*/
2930}
2931