busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9
  10//usage:#define awk_trivial_usage
  11//usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
  12//usage:#define awk_full_usage "\n\n"
  13//usage:       "        -v VAR=VAL      Set variable"
  14//usage:     "\n        -F SEP          Use SEP as field separator"
  15//usage:     "\n        -f FILE         Read program from FILE"
  16
  17#include "libbb.h"
  18#include "xregex.h"
  19#include <math.h>
  20
  21/* This is a NOEXEC applet. Be very careful! */
  22
  23
  24/* If you comment out one of these below, it will be #defined later
  25 * to perform debug printfs to stderr: */
  26#define debug_printf_walker(...)  do {} while (0)
  27#define debug_printf_eval(...)  do {} while (0)
  28
  29#ifndef debug_printf_walker
  30# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
  31#endif
  32#ifndef debug_printf_eval
  33# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
  34#endif
  35
  36
  37
  38#define MAXVARFMT       240
  39#define MINNVBLOCK      64
  40
  41/* variable flags */
  42#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  43#define VF_ARRAY        0x0002  /* 1 = it's an array */
  44
  45#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
  46#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
  47#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
  48#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
  49#define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
  50#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
  51#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
  52
  53/* these flags are static, don't change them when value is changed */
  54#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
  55
  56typedef struct walker_list {
  57        char *end;
  58        char *cur;
  59        struct walker_list *prev;
  60        char wbuf[1];
  61} walker_list;
  62
  63/* Variable */
  64typedef struct var_s {
  65        unsigned type;            /* flags */
  66        double number;
  67        char *string;
  68        union {
  69                int aidx;               /* func arg idx (for compilation stage) */
  70                struct xhash_s *array;  /* array ptr */
  71                struct var_s *parent;   /* for func args, ptr to actual parameter */
  72                walker_list *walker;    /* list of array elements (for..in) */
  73        } x;
  74} var;
  75
  76/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
  77typedef struct chain_s {
  78        struct node_s *first;
  79        struct node_s *last;
  80        const char *programname;
  81} chain;
  82
  83/* Function */
  84typedef struct func_s {
  85        unsigned nargs;
  86        struct chain_s body;
  87} func;
  88
  89/* I/O stream */
  90typedef struct rstream_s {
  91        FILE *F;
  92        char *buffer;
  93        int adv;
  94        int size;
  95        int pos;
  96        smallint is_pipe;
  97} rstream;
  98
  99typedef struct hash_item_s {
 100        union {
 101                struct var_s v;         /* variable/array hash */
 102                struct rstream_s rs;    /* redirect streams hash */
 103                struct func_s f;        /* functions hash */
 104        } data;
 105        struct hash_item_s *next;       /* next in chain */
 106        char name[1];                   /* really it's longer */
 107} hash_item;
 108
 109typedef struct xhash_s {
 110        unsigned nel;           /* num of elements */
 111        unsigned csize;         /* current hash size */
 112        unsigned nprime;        /* next hash size in PRIMES[] */
 113        unsigned glen;          /* summary length of item names */
 114        struct hash_item_s **items;
 115} xhash;
 116
 117/* Tree node */
 118typedef struct node_s {
 119        uint32_t info;
 120        unsigned lineno;
 121        union {
 122                struct node_s *n;
 123                var *v;
 124                int aidx;
 125                char *new_progname;
 126                regex_t *re;
 127        } l;
 128        union {
 129                struct node_s *n;
 130                regex_t *ire;
 131                func *f;
 132        } r;
 133        union {
 134                struct node_s *n;
 135        } a;
 136} node;
 137
 138/* Block of temporary variables */
 139typedef struct nvblock_s {
 140        int size;
 141        var *pos;
 142        struct nvblock_s *prev;
 143        struct nvblock_s *next;
 144        var nv[];
 145} nvblock;
 146
 147typedef struct tsplitter_s {
 148        node n;
 149        regex_t re[2];
 150} tsplitter;
 151
 152/* simple token classes */
 153/* Order and hex values are very important!!!  See next_token() */
 154#define TC_SEQSTART      1                              /* ( */
 155#define TC_SEQTERM      (1 << 1)                /* ) */
 156#define TC_REGEXP       (1 << 2)                /* /.../ */
 157#define TC_OUTRDR       (1 << 3)                /* | > >> */
 158#define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
 159#define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
 160#define TC_BINOPX       (1 << 6)                /* two-opnd operator */
 161#define TC_IN           (1 << 7)
 162#define TC_COMMA        (1 << 8)
 163#define TC_PIPE         (1 << 9)                /* input redirection pipe */
 164#define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
 165#define TC_ARRTERM      (1 << 11)               /* ] */
 166#define TC_GRPSTART     (1 << 12)               /* { */
 167#define TC_GRPTERM      (1 << 13)               /* } */
 168#define TC_SEMICOL      (1 << 14)
 169#define TC_NEWLINE      (1 << 15)
 170#define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
 171#define TC_WHILE        (1 << 17)
 172#define TC_ELSE         (1 << 18)
 173#define TC_BUILTIN      (1 << 19)
 174#define TC_GETLINE      (1 << 20)
 175#define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
 176#define TC_BEGIN        (1 << 22)
 177#define TC_END          (1 << 23)
 178#define TC_EOF          (1 << 24)
 179#define TC_VARIABLE     (1 << 25)
 180#define TC_ARRAY        (1 << 26)
 181#define TC_FUNCTION     (1 << 27)
 182#define TC_STRING       (1 << 28)
 183#define TC_NUMBER       (1 << 29)
 184
 185#define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
 186
 187/* combined token classes */
 188#define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 189#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 190#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 191                   | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
 192
 193#define TC_STATEMNT (TC_STATX | TC_WHILE)
 194#define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
 195
 196/* word tokens, cannot mean something else if not expected */
 197#define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
 198                   | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
 199
 200/* discard newlines after these */
 201#define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
 202                   | TC_BINOP | TC_OPTERM)
 203
 204/* what can expression begin with */
 205#define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
 206/* what can group begin with */
 207#define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
 208
 209/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 210/* operator is inserted between them */
 211#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
 212                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
 213#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 214
 215#define OF_RES1    0x010000
 216#define OF_RES2    0x020000
 217#define OF_STR1    0x040000
 218#define OF_STR2    0x080000
 219#define OF_NUM1    0x100000
 220#define OF_CHECKED 0x200000
 221
 222/* combined operator flags */
 223#define xx      0
 224#define xV      OF_RES2
 225#define xS      (OF_RES2 | OF_STR2)
 226#define Vx      OF_RES1
 227#define VV      (OF_RES1 | OF_RES2)
 228#define Nx      (OF_RES1 | OF_NUM1)
 229#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 230#define Sx      (OF_RES1 | OF_STR1)
 231#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 232#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 233
 234#define OPCLSMASK 0xFF00
 235#define OPNMASK   0x007F
 236
 237/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 238 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
 239 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
 240 */
 241#define P(x)      (x << 24)
 242#define PRIMASK   0x7F000000
 243#define PRIMASK2  0x7E000000
 244
 245/* Operation classes */
 246
 247#define SHIFT_TIL_THIS  0x0600
 248#define RECUR_FROM_THIS 0x1000
 249
 250enum {
 251        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 252        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 253
 254        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 255        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 256        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 257
 258        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 259        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 260        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 261        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 262        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 263        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 264        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 265        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 266        OC_DONE = 0x2800,
 267
 268        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 269        ST_WHILE = 0x3300
 270};
 271
 272/* simple builtins */
 273enum {
 274        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 275        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 276};
 277
 278/* builtins */
 279enum {
 280        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
 281        B_ge,   B_gs,   B_su,
 282        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 283};
 284
 285/* tokens and their corresponding info values */
 286
 287#define NTC     "\377"  /* switch to next token class (tc<<1) */
 288#define NTCC    '\377'
 289
 290#define OC_B  OC_BUILTIN
 291
 292static const char tokenlist[] ALIGN1 =
 293        "\1("         NTC
 294        "\1)"         NTC
 295        "\1/"         NTC                                   /* REGEXP */
 296        "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
 297        "\2++"        "\2--"        NTC                     /* UOPPOST */
 298        "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
 299        "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
 300        "\2*="        "\2/="        "\2%="      "\2^="
 301        "\1+"         "\1-"         "\3**="     "\2**"
 302        "\1/"         "\1%"         "\1^"       "\1*"
 303        "\2!="        "\2>="        "\2<="      "\1>"
 304        "\1<"         "\2!~"        "\1~"       "\2&&"
 305        "\2||"        "\1?"         "\1:"       NTC
 306        "\2in"        NTC
 307        "\1,"         NTC
 308        "\1|"         NTC
 309        "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
 310        "\1]"         NTC
 311        "\1{"         NTC
 312        "\1}"         NTC
 313        "\1;"         NTC
 314        "\1\n"        NTC
 315        "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
 316        "\10continue" "\6delete"    "\5print"
 317        "\6printf"    "\4next"      "\10nextfile"
 318        "\6return"    "\4exit"      NTC
 319        "\5while"     NTC
 320        "\4else"      NTC
 321
 322        "\3and"       "\5compl"     "\6lshift"  "\2or"
 323        "\6rshift"    "\3xor"
 324        "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
 325        "\3cos"       "\3exp"       "\3int"     "\3log"
 326        "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
 327        "\6gensub"    "\4gsub"      "\5index"   "\6length"
 328        "\5match"     "\5split"     "\7sprintf" "\3sub"
 329        "\6substr"    "\7systime"   "\10strftime" "\6mktime"
 330        "\7tolower"   "\7toupper"   NTC
 331        "\7getline"   NTC
 332        "\4func"      "\10function" NTC
 333        "\5BEGIN"     NTC
 334        "\3END"
 335        /* compiler adds trailing "\0" */
 336        ;
 337
 338static const uint32_t tokeninfo[] = {
 339        0,
 340        0,
 341        OC_REGEXP,
 342        xS|'a',                  xS|'w',                  xS|'|',
 343        OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 344        OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
 345        OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
 346        OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
 347        OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
 348        OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
 349        OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 350        OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 351        OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
 352        OC_IN|SV|P(49), /* in */
 353        OC_COMMA|SS|P(80),
 354        OC_PGETLINE|SV|P(37),
 355        OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 356        0, /* ] */
 357        0,
 358        0,
 359        0,
 360        0, /* \n */
 361        ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 362        OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
 363        OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
 364        OC_RETURN|Vx, OC_EXIT|Nx,
 365        ST_WHILE,
 366        0, /* else */
 367
 368        OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
 369        OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
 370        OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
 371        OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
 372        OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
 373        OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
 374        OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
 375        OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
 376        OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
 377        OC_GETLINE|SV|P(0),
 378        0,                 0,
 379        0,
 380        0 /* END */
 381};
 382
 383/* internal variable names and their initial values       */
 384/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 385enum {
 386        CONVFMT,    OFMT,       FS,         OFS,
 387        ORS,        RS,         RT,         FILENAME,
 388        SUBSEP,     F0,         ARGIND,     ARGC,
 389        ARGV,       ERRNO,      FNR,        NR,
 390        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 391};
 392
 393static const char vNames[] ALIGN1 =
 394        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 395        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 396        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 397        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 398        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 399
 400static const char vValues[] ALIGN1 =
 401        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 402        "\n\0"      "\n\0"      "\0"        "\0"
 403        "\034\0"    "\0"        "\377";
 404
 405/* hash size may grow to these values */
 406#define FIRST_PRIME 61
 407static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 408
 409
 410/* Globals. Split in two parts so that first one is addressed
 411 * with (mostly short) negative offsets.
 412 * NB: it's unsafe to put members of type "double"
 413 * into globals2 (gcc may fail to align them).
 414 */
 415struct globals {
 416        double t_double;
 417        chain beginseq, mainseq, endseq;
 418        chain *seq;
 419        node *break_ptr, *continue_ptr;
 420        rstream *iF;
 421        xhash *vhash, *ahash, *fdhash, *fnhash;
 422        const char *g_progname;
 423        int g_lineno;
 424        int nfields;
 425        int maxfields; /* used in fsrealloc() only */
 426        var *Fields;
 427        nvblock *g_cb;
 428        char *g_pos;
 429        char *g_buf;
 430        smallint icase;
 431        smallint exiting;
 432        smallint nextrec;
 433        smallint nextfile;
 434        smallint is_f0_split;
 435};
 436struct globals2 {
 437        uint32_t t_info; /* often used */
 438        uint32_t t_tclass;
 439        char *t_string;
 440        int t_lineno;
 441        int t_rollback;
 442
 443        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 444
 445        /* former statics from various functions */
 446        char *split_f0__fstrings;
 447
 448        uint32_t next_token__save_tclass;
 449        uint32_t next_token__save_info;
 450        uint32_t next_token__ltclass;
 451        smallint next_token__concat_inserted;
 452
 453        smallint next_input_file__files_happen;
 454        rstream next_input_file__rsm;
 455
 456        var *evaluate__fnargs;
 457        unsigned evaluate__seed;
 458        regex_t evaluate__sreg;
 459
 460        var ptest__v;
 461
 462        tsplitter exec_builtin__tspl;
 463
 464        /* biggest and least used members go last */
 465        tsplitter fsplitter, rsplitter;
 466};
 467#define G1 (ptr_to_globals[-1])
 468#define G (*(struct globals2 *)ptr_to_globals)
 469/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 470/*char G1size[sizeof(G1)]; - 0x74 */
 471/*char Gsize[sizeof(G)]; - 0x1c4 */
 472/* Trying to keep most of members accessible with short offsets: */
 473/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
 474#define t_double     (G1.t_double    )
 475#define beginseq     (G1.beginseq    )
 476#define mainseq      (G1.mainseq     )
 477#define endseq       (G1.endseq      )
 478#define seq          (G1.seq         )
 479#define break_ptr    (G1.break_ptr   )
 480#define continue_ptr (G1.continue_ptr)
 481#define iF           (G1.iF          )
 482#define vhash        (G1.vhash       )
 483#define ahash        (G1.ahash       )
 484#define fdhash       (G1.fdhash      )
 485#define fnhash       (G1.fnhash      )
 486#define g_progname   (G1.g_progname  )
 487#define g_lineno     (G1.g_lineno    )
 488#define nfields      (G1.nfields     )
 489#define maxfields    (G1.maxfields   )
 490#define Fields       (G1.Fields      )
 491#define g_cb         (G1.g_cb        )
 492#define g_pos        (G1.g_pos       )
 493#define g_buf        (G1.g_buf       )
 494#define icase        (G1.icase       )
 495#define exiting      (G1.exiting     )
 496#define nextrec      (G1.nextrec     )
 497#define nextfile     (G1.nextfile    )
 498#define is_f0_split  (G1.is_f0_split )
 499#define t_info       (G.t_info      )
 500#define t_tclass     (G.t_tclass    )
 501#define t_string     (G.t_string    )
 502#define t_lineno     (G.t_lineno    )
 503#define t_rollback   (G.t_rollback  )
 504#define intvar       (G.intvar      )
 505#define fsplitter    (G.fsplitter   )
 506#define rsplitter    (G.rsplitter   )
 507#define INIT_G() do { \
 508        SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
 509        G.next_token__ltclass = TC_OPTERM; \
 510        G.evaluate__seed = 1; \
 511} while (0)
 512
 513
 514/* function prototypes */
 515static void handle_special(var *);
 516static node *parse_expr(uint32_t);
 517static void chain_group(void);
 518static var *evaluate(node *, var *);
 519static rstream *next_input_file(void);
 520static int fmt_num(char *, int, const char *, double, int);
 521static int awk_exit(int) NORETURN;
 522
 523/* ---- error handling ---- */
 524
 525static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
 526static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 527static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 528static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 529static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 530static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
 531static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 532static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 533static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 534static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 535
 536static void zero_out_var(var *vp)
 537{
 538        memset(vp, 0, sizeof(*vp));
 539}
 540
 541static void syntax_error(const char *message) NORETURN;
 542static void syntax_error(const char *message)
 543{
 544        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 545}
 546
 547/* ---- hash stuff ---- */
 548
 549static unsigned hashidx(const char *name)
 550{
 551        unsigned idx = 0;
 552
 553        while (*name)
 554                idx = *name++ + (idx << 6) - idx;
 555        return idx;
 556}
 557
 558/* create new hash */
 559static xhash *hash_init(void)
 560{
 561        xhash *newhash;
 562
 563        newhash = xzalloc(sizeof(*newhash));
 564        newhash->csize = FIRST_PRIME;
 565        newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
 566
 567        return newhash;
 568}
 569
 570/* find item in hash, return ptr to data, NULL if not found */
 571static void *hash_search(xhash *hash, const char *name)
 572{
 573        hash_item *hi;
 574
 575        hi = hash->items[hashidx(name) % hash->csize];
 576        while (hi) {
 577                if (strcmp(hi->name, name) == 0)
 578                        return &hi->data;
 579                hi = hi->next;
 580        }
 581        return NULL;
 582}
 583
 584/* grow hash if it becomes too big */
 585static void hash_rebuild(xhash *hash)
 586{
 587        unsigned newsize, i, idx;
 588        hash_item **newitems, *hi, *thi;
 589
 590        if (hash->nprime == ARRAY_SIZE(PRIMES))
 591                return;
 592
 593        newsize = PRIMES[hash->nprime++];
 594        newitems = xzalloc(newsize * sizeof(newitems[0]));
 595
 596        for (i = 0; i < hash->csize; i++) {
 597                hi = hash->items[i];
 598                while (hi) {
 599                        thi = hi;
 600                        hi = thi->next;
 601                        idx = hashidx(thi->name) % newsize;
 602                        thi->next = newitems[idx];
 603                        newitems[idx] = thi;
 604                }
 605        }
 606
 607        free(hash->items);
 608        hash->csize = newsize;
 609        hash->items = newitems;
 610}
 611
 612/* find item in hash, add it if necessary. Return ptr to data */
 613static void *hash_find(xhash *hash, const char *name)
 614{
 615        hash_item *hi;
 616        unsigned idx;
 617        int l;
 618
 619        hi = hash_search(hash, name);
 620        if (!hi) {
 621                if (++hash->nel / hash->csize > 10)
 622                        hash_rebuild(hash);
 623
 624                l = strlen(name) + 1;
 625                hi = xzalloc(sizeof(*hi) + l);
 626                strcpy(hi->name, name);
 627
 628                idx = hashidx(name) % hash->csize;
 629                hi->next = hash->items[idx];
 630                hash->items[idx] = hi;
 631                hash->glen += l;
 632        }
 633        return &hi->data;
 634}
 635
 636#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 637#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 638#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 639#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 640
 641static void hash_remove(xhash *hash, const char *name)
 642{
 643        hash_item *hi, **phi;
 644
 645        phi = &hash->items[hashidx(name) % hash->csize];
 646        while (*phi) {
 647                hi = *phi;
 648                if (strcmp(hi->name, name) == 0) {
 649                        hash->glen -= (strlen(name) + 1);
 650                        hash->nel--;
 651                        *phi = hi->next;
 652                        free(hi);
 653                        break;
 654                }
 655                phi = &hi->next;
 656        }
 657}
 658
 659/* ------ some useful functions ------ */
 660
 661static char *skip_spaces(char *p)
 662{
 663        while (1) {
 664                if (*p == '\\' && p[1] == '\n') {
 665                        p++;
 666                        t_lineno++;
 667                } else if (*p != ' ' && *p != '\t') {
 668                        break;
 669                }
 670                p++;
 671        }
 672        return p;
 673}
 674
 675/* returns old *s, advances *s past word and terminating NUL */
 676static char *nextword(char **s)
 677{
 678        char *p = *s;
 679        while (*(*s)++ != '\0')
 680                continue;
 681        return p;
 682}
 683
 684static char nextchar(char **s)
 685{
 686        char c, *pps;
 687
 688        c = *(*s)++;
 689        pps = *s;
 690        if (c == '\\')
 691                c = bb_process_escape_sequence((const char**)s);
 692        if (c == '\\' && *s == pps) { /* unrecognized \z? */
 693                c = *(*s); /* yes, fetch z */
 694                if (c)
 695                        (*s)++; /* advance unless z = NUL */
 696        }
 697        return c;
 698}
 699
 700static ALWAYS_INLINE int isalnum_(int c)
 701{
 702        return (isalnum(c) || c == '_');
 703}
 704
 705static double my_strtod(char **pp)
 706{
 707        char *cp = *pp;
 708        if (ENABLE_DESKTOP && cp[0] == '0') {
 709                /* Might be hex or octal integer: 0x123abc or 07777 */
 710                char c = (cp[1] | 0x20);
 711                if (c == 'x' || isdigit(cp[1])) {
 712                        unsigned long long ull = strtoull(cp, pp, 0);
 713                        if (c == 'x')
 714                                return ull;
 715                        c = **pp;
 716                        if (!isdigit(c) && c != '.')
 717                                return ull;
 718                        /* else: it may be a floating number. Examples:
 719                         * 009.123 (*pp points to '9')
 720                         * 000.123 (*pp points to '.')
 721                         * fall through to strtod.
 722                         */
 723                }
 724        }
 725        return strtod(cp, pp);
 726}
 727
 728/* -------- working with variables (set/get/copy/etc) -------- */
 729
 730static xhash *iamarray(var *v)
 731{
 732        var *a = v;
 733
 734        while (a->type & VF_CHILD)
 735                a = a->x.parent;
 736
 737        if (!(a->type & VF_ARRAY)) {
 738                a->type |= VF_ARRAY;
 739                a->x.array = hash_init();
 740        }
 741        return a->x.array;
 742}
 743
 744static void clear_array(xhash *array)
 745{
 746        unsigned i;
 747        hash_item *hi, *thi;
 748
 749        for (i = 0; i < array->csize; i++) {
 750                hi = array->items[i];
 751                while (hi) {
 752                        thi = hi;
 753                        hi = hi->next;
 754                        free(thi->data.v.string);
 755                        free(thi);
 756                }
 757                array->items[i] = NULL;
 758        }
 759        array->glen = array->nel = 0;
 760}
 761
 762/* clear a variable */
 763static var *clrvar(var *v)
 764{
 765        if (!(v->type & VF_FSTR))
 766                free(v->string);
 767
 768        v->type &= VF_DONTTOUCH;
 769        v->type |= VF_DIRTY;
 770        v->string = NULL;
 771        return v;
 772}
 773
 774/* assign string value to variable */
 775static var *setvar_p(var *v, char *value)
 776{
 777        clrvar(v);
 778        v->string = value;
 779        handle_special(v);
 780        return v;
 781}
 782
 783/* same as setvar_p but make a copy of string */
 784static var *setvar_s(var *v, const char *value)
 785{
 786        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 787}
 788
 789/* same as setvar_s but sets USER flag */
 790static var *setvar_u(var *v, const char *value)
 791{
 792        v = setvar_s(v, value);
 793        v->type |= VF_USER;
 794        return v;
 795}
 796
 797/* set array element to user string */
 798static void setari_u(var *a, int idx, const char *s)
 799{
 800        var *v;
 801
 802        v = findvar(iamarray(a), itoa(idx));
 803        setvar_u(v, s);
 804}
 805
 806/* assign numeric value to variable */
 807static var *setvar_i(var *v, double value)
 808{
 809        clrvar(v);
 810        v->type |= VF_NUMBER;
 811        v->number = value;
 812        handle_special(v);
 813        return v;
 814}
 815
 816static const char *getvar_s(var *v)
 817{
 818        /* if v is numeric and has no cached string, convert it to string */
 819        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
 820                fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
 821                v->string = xstrdup(g_buf);
 822                v->type |= VF_CACHED;
 823        }
 824        return (v->string == NULL) ? "" : v->string;
 825}
 826
 827static double getvar_i(var *v)
 828{
 829        char *s;
 830
 831        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
 832                v->number = 0;
 833                s = v->string;
 834                if (s && *s) {
 835                        debug_printf_eval("getvar_i: '%s'->", s);
 836                        v->number = my_strtod(&s);
 837                        debug_printf_eval("%f (s:'%s')\n", v->number, s);
 838                        if (v->type & VF_USER) {
 839                                s = skip_spaces(s);
 840                                if (*s != '\0')
 841                                        v->type &= ~VF_USER;
 842                        }
 843                } else {
 844                        debug_printf_eval("getvar_i: '%s'->zero\n", s);
 845                        v->type &= ~VF_USER;
 846                }
 847                v->type |= VF_CACHED;
 848        }
 849        debug_printf_eval("getvar_i: %f\n", v->number);
 850        return v->number;
 851}
 852
 853/* Used for operands of bitwise ops */
 854static unsigned long getvar_i_int(var *v)
 855{
 856        double d = getvar_i(v);
 857
 858        /* Casting doubles to longs is undefined for values outside
 859         * of target type range. Try to widen it as much as possible */
 860        if (d >= 0)
 861                return (unsigned long)d;
 862        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
 863        return - (long) (unsigned long) (-d);
 864}
 865
 866static var *copyvar(var *dest, const var *src)
 867{
 868        if (dest != src) {
 869                clrvar(dest);
 870                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
 871                debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
 872                dest->number = src->number;
 873                if (src->string)
 874                        dest->string = xstrdup(src->string);
 875        }
 876        handle_special(dest);
 877        return dest;
 878}
 879
 880static var *incvar(var *v)
 881{
 882        return setvar_i(v, getvar_i(v) + 1.0);
 883}
 884
 885/* return true if v is number or numeric string */
 886static int is_numeric(var *v)
 887{
 888        getvar_i(v);
 889        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
 890}
 891
 892/* return 1 when value of v corresponds to true, 0 otherwise */
 893static int istrue(var *v)
 894{
 895        if (is_numeric(v))
 896                return (v->number != 0);
 897        return (v->string && v->string[0]);
 898}
 899
 900/* temporary variables allocator. Last allocated should be first freed */
 901static var *nvalloc(int n)
 902{
 903        nvblock *pb = NULL;
 904        var *v, *r;
 905        int size;
 906
 907        while (g_cb) {
 908                pb = g_cb;
 909                if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
 910                        break;
 911                g_cb = g_cb->next;
 912        }
 913
 914        if (!g_cb) {
 915                size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
 916                g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
 917                g_cb->size = size;
 918                g_cb->pos = g_cb->nv;
 919                g_cb->prev = pb;
 920                /*g_cb->next = NULL; - xzalloc did it */
 921                if (pb)
 922                        pb->next = g_cb;
 923        }
 924
 925        v = r = g_cb->pos;
 926        g_cb->pos += n;
 927
 928        while (v < g_cb->pos) {
 929                v->type = 0;
 930                v->string = NULL;
 931                v++;
 932        }
 933
 934        return r;
 935}
 936
 937static void nvfree(var *v)
 938{
 939        var *p;
 940
 941        if (v < g_cb->nv || v >= g_cb->pos)
 942                syntax_error(EMSG_INTERNAL_ERROR);
 943
 944        for (p = v; p < g_cb->pos; p++) {
 945                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
 946                        clear_array(iamarray(p));
 947                        free(p->x.array->items);
 948                        free(p->x.array);
 949                }
 950                if (p->type & VF_WALK) {
 951                        walker_list *n;
 952                        walker_list *w = p->x.walker;
 953                        debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
 954                        p->x.walker = NULL;
 955                        while (w) {
 956                                n = w->prev;
 957                                debug_printf_walker(" free(%p)\n", w);
 958                                free(w);
 959                                w = n;
 960                        }
 961                }
 962                clrvar(p);
 963        }
 964
 965        g_cb->pos = v;
 966        while (g_cb->prev && g_cb->pos == g_cb->nv) {
 967                g_cb = g_cb->prev;
 968        }
 969}
 970
 971/* ------- awk program text parsing ------- */
 972
 973/* Parse next token pointed by global pos, place results into global ttt.
 974 * If token isn't expected, give away. Return token class
 975 */
 976static uint32_t next_token(uint32_t expected)
 977{
 978#define concat_inserted (G.next_token__concat_inserted)
 979#define save_tclass     (G.next_token__save_tclass)
 980#define save_info       (G.next_token__save_info)
 981/* Initialized to TC_OPTERM: */
 982#define ltclass         (G.next_token__ltclass)
 983
 984        char *p, *s;
 985        const char *tl;
 986        uint32_t tc;
 987        const uint32_t *ti;
 988
 989        if (t_rollback) {
 990                t_rollback = FALSE;
 991
 992        } else if (concat_inserted) {
 993                concat_inserted = FALSE;
 994                t_tclass = save_tclass;
 995                t_info = save_info;
 996
 997        } else {
 998                p = g_pos;
 999 readnext:
1000                p = skip_spaces(p);
1001                g_lineno = t_lineno;
1002                if (*p == '#')
1003                        while (*p != '\n' && *p != '\0')
1004                                p++;
1005
1006                if (*p == '\n')
1007                        t_lineno++;
1008
1009                if (*p == '\0') {
1010                        tc = TC_EOF;
1011
1012                } else if (*p == '\"') {
1013                        /* it's a string */
1014                        t_string = s = ++p;
1015                        while (*p != '\"') {
1016                                char *pp;
1017                                if (*p == '\0' || *p == '\n')
1018                                        syntax_error(EMSG_UNEXP_EOS);
1019                                pp = p;
1020                                *s++ = nextchar(&pp);
1021                                p = pp;
1022                        }
1023                        p++;
1024                        *s = '\0';
1025                        tc = TC_STRING;
1026
1027                } else if ((expected & TC_REGEXP) && *p == '/') {
1028                        /* it's regexp */
1029                        t_string = s = ++p;
1030                        while (*p != '/') {
1031                                if (*p == '\0' || *p == '\n')
1032                                        syntax_error(EMSG_UNEXP_EOS);
1033                                *s = *p++;
1034                                if (*s++ == '\\') {
1035                                        char *pp = p;
1036                                        s[-1] = bb_process_escape_sequence((const char **)&pp);
1037                                        if (*p == '\\')
1038                                                *s++ = '\\';
1039                                        if (pp == p)
1040                                                *s++ = *p++;
1041                                        else
1042                                                p = pp;
1043                                }
1044                        }
1045                        p++;
1046                        *s = '\0';
1047                        tc = TC_REGEXP;
1048
1049                } else if (*p == '.' || isdigit(*p)) {
1050                        /* it's a number */
1051                        char *pp = p;
1052                        t_double = my_strtod(&pp);
1053                        p = pp;
1054                        if (*p == '.')
1055                                syntax_error(EMSG_UNEXP_TOKEN);
1056                        tc = TC_NUMBER;
1057
1058                } else {
1059                        /* search for something known */
1060                        tl = tokenlist;
1061                        tc = 0x00000001;
1062                        ti = tokeninfo;
1063                        while (*tl) {
1064                                int l = (unsigned char) *tl++;
1065                                if (l == (unsigned char) NTCC) {
1066                                        tc <<= 1;
1067                                        continue;
1068                                }
1069                                /* if token class is expected,
1070                                 * token matches,
1071                                 * and it's not a longer word,
1072                                 */
1073                                if ((tc & (expected | TC_WORD | TC_NEWLINE))
1074                                 && strncmp(p, tl, l) == 0
1075                                 && !((tc & TC_WORD) && isalnum_(p[l]))
1076                                ) {
1077                                        /* then this is what we are looking for */
1078                                        t_info = *ti;
1079                                        p += l;
1080                                        goto token_found;
1081                                }
1082                                ti++;
1083                                tl += l;
1084                        }
1085                        /* not a known token */
1086
1087                        /* is it a name? (var/array/function) */
1088                        if (!isalnum_(*p))
1089                                syntax_error(EMSG_UNEXP_TOKEN); /* no */
1090                        /* yes */
1091                        t_string = --p;
1092                        while (isalnum_(*++p)) {
1093                                p[-1] = *p;
1094                        }
1095                        p[-1] = '\0';
1096                        tc = TC_VARIABLE;
1097                        /* also consume whitespace between functionname and bracket */
1098                        if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1099                                p = skip_spaces(p);
1100                        if (*p == '(') {
1101                                tc = TC_FUNCTION;
1102                        } else {
1103                                if (*p == '[') {
1104                                        p++;
1105                                        tc = TC_ARRAY;
1106                                }
1107                        }
1108 token_found: ;
1109                }
1110                g_pos = p;
1111
1112                /* skipping newlines in some cases */
1113                if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1114                        goto readnext;
1115
1116                /* insert concatenation operator when needed */
1117                if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1118                        concat_inserted = TRUE;
1119                        save_tclass = tc;
1120                        save_info = t_info;
1121                        tc = TC_BINOP;
1122                        t_info = OC_CONCAT | SS | P(35);
1123                }
1124
1125                t_tclass = tc;
1126        }
1127        ltclass = t_tclass;
1128
1129        /* Are we ready for this? */
1130        if (!(ltclass & expected))
1131                syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1132                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1133
1134        return ltclass;
1135#undef concat_inserted
1136#undef save_tclass
1137#undef save_info
1138#undef ltclass
1139}
1140
1141static void rollback_token(void)
1142{
1143        t_rollback = TRUE;
1144}
1145
1146static node *new_node(uint32_t info)
1147{
1148        node *n;
1149
1150        n = xzalloc(sizeof(node));
1151        n->info = info;
1152        n->lineno = g_lineno;
1153        return n;
1154}
1155
1156static void mk_re_node(const char *s, node *n, regex_t *re)
1157{
1158        n->info = OC_REGEXP;
1159        n->l.re = re;
1160        n->r.ire = re + 1;
1161        xregcomp(re, s, REG_EXTENDED);
1162        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1163}
1164
1165static node *condition(void)
1166{
1167        next_token(TC_SEQSTART);
1168        return parse_expr(TC_SEQTERM);
1169}
1170
1171/* parse expression terminated by given argument, return ptr
1172 * to built subtree. Terminator is eaten by parse_expr */
1173static node *parse_expr(uint32_t iexp)
1174{
1175        node sn;
1176        node *cn = &sn;
1177        node *vn, *glptr;
1178        uint32_t tc, xtc;
1179        var *v;
1180
1181        sn.info = PRIMASK;
1182        sn.r.n = glptr = NULL;
1183        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1184
1185        while (!((tc = next_token(xtc)) & iexp)) {
1186
1187                if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1188                        /* input redirection (<) attached to glptr node */
1189                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1190                        cn->a.n = glptr;
1191                        xtc = TC_OPERAND | TC_UOPPRE;
1192                        glptr = NULL;
1193
1194                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1195                        /* for binary and postfix-unary operators, jump back over
1196                         * previous operators with higher priority */
1197                        vn = cn;
1198                        while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1199                            || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1200                        ) {
1201                                vn = vn->a.n;
1202                        }
1203                        if ((t_info & OPCLSMASK) == OC_TERNARY)
1204                                t_info += P(6);
1205                        cn = vn->a.n->r.n = new_node(t_info);
1206                        cn->a.n = vn->a.n;
1207                        if (tc & TC_BINOP) {
1208                                cn->l.n = vn;
1209                                xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210                                if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1211                                        /* it's a pipe */
1212                                        next_token(TC_GETLINE);
1213                                        /* give maximum priority to this pipe */
1214                                        cn->info &= ~PRIMASK;
1215                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1216                                }
1217                        } else {
1218                                cn->r.n = vn;
1219                                xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1220                        }
1221                        vn->a.n = cn;
1222
1223                } else {
1224                        /* for operands and prefix-unary operators, attach them
1225                         * to last node */
1226                        vn = cn;
1227                        cn = vn->r.n = new_node(t_info);
1228                        cn->a.n = vn;
1229                        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1230                        if (tc & (TC_OPERAND | TC_REGEXP)) {
1231                                xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1232                                /* one should be very careful with switch on tclass -
1233                                 * only simple tclasses should be used! */
1234                                switch (tc) {
1235                                case TC_VARIABLE:
1236                                case TC_ARRAY:
1237                                        cn->info = OC_VAR;
1238                                        v = hash_search(ahash, t_string);
1239                                        if (v != NULL) {
1240                                                cn->info = OC_FNARG;
1241                                                cn->l.aidx = v->x.aidx;
1242                                        } else {
1243                                                cn->l.v = newvar(t_string);
1244                                        }
1245                                        if (tc & TC_ARRAY) {
1246                                                cn->info |= xS;
1247                                                cn->r.n = parse_expr(TC_ARRTERM);
1248                                        }
1249                                        break;
1250
1251                                case TC_NUMBER:
1252                                case TC_STRING:
1253                                        cn->info = OC_VAR;
1254                                        v = cn->l.v = xzalloc(sizeof(var));
1255                                        if (tc & TC_NUMBER)
1256                                                setvar_i(v, t_double);
1257                                        else
1258                                                setvar_s(v, t_string);
1259                                        break;
1260
1261                                case TC_REGEXP:
1262                                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1263                                        break;
1264
1265                                case TC_FUNCTION:
1266                                        cn->info = OC_FUNC;
1267                                        cn->r.f = newfunc(t_string);
1268                                        cn->l.n = condition();
1269                                        break;
1270
1271                                case TC_SEQSTART:
1272                                        cn = vn->r.n = parse_expr(TC_SEQTERM);
1273                                        cn->a.n = vn;
1274                                        break;
1275
1276                                case TC_GETLINE:
1277                                        glptr = cn;
1278                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1279                                        break;
1280
1281                                case TC_BUILTIN:
1282                                        cn->l.n = condition();
1283                                        break;
1284                                }
1285                        }
1286                }
1287        }
1288        return sn.r.n;
1289}
1290
1291/* add node to chain. Return ptr to alloc'd node */
1292static node *chain_node(uint32_t info)
1293{
1294        node *n;
1295
1296        if (!seq->first)
1297                seq->first = seq->last = new_node(0);
1298
1299        if (seq->programname != g_progname) {
1300                seq->programname = g_progname;
1301                n = chain_node(OC_NEWSOURCE);
1302                n->l.new_progname = xstrdup(g_progname);
1303        }
1304
1305        n = seq->last;
1306        n->info = info;
1307        seq->last = n->a.n = new_node(OC_DONE);
1308
1309        return n;
1310}
1311
1312static void chain_expr(uint32_t info)
1313{
1314        node *n;
1315
1316        n = chain_node(info);
1317        n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1318        if (t_tclass & TC_GRPTERM)
1319                rollback_token();
1320}
1321
1322static node *chain_loop(node *nn)
1323{
1324        node *n, *n2, *save_brk, *save_cont;
1325
1326        save_brk = break_ptr;
1327        save_cont = continue_ptr;
1328
1329        n = chain_node(OC_BR | Vx);
1330        continue_ptr = new_node(OC_EXEC);
1331        break_ptr = new_node(OC_EXEC);
1332        chain_group();
1333        n2 = chain_node(OC_EXEC | Vx);
1334        n2->l.n = nn;
1335        n2->a.n = n;
1336        continue_ptr->a.n = n2;
1337        break_ptr->a.n = n->r.n = seq->last;
1338
1339        continue_ptr = save_cont;
1340        break_ptr = save_brk;
1341
1342        return n;
1343}
1344
1345/* parse group and attach it to chain */
1346static void chain_group(void)
1347{
1348        uint32_t c;
1349        node *n, *n2, *n3;
1350
1351        do {
1352                c = next_token(TC_GRPSEQ);
1353        } while (c & TC_NEWLINE);
1354
1355        if (c & TC_GRPSTART) {
1356                while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1357                        if (t_tclass & TC_NEWLINE)
1358                                continue;
1359                        rollback_token();
1360                        chain_group();
1361                }
1362        } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1363                rollback_token();
1364                chain_expr(OC_EXEC | Vx);
1365        } else {                                                /* TC_STATEMNT */
1366                switch (t_info & OPCLSMASK) {
1367                case ST_IF:
1368                        n = chain_node(OC_BR | Vx);
1369                        n->l.n = condition();
1370                        chain_group();
1371                        n2 = chain_node(OC_EXEC);
1372                        n->r.n = seq->last;
1373                        if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1374                                chain_group();
1375                                n2->a.n = seq->last;
1376                        } else {
1377                                rollback_token();
1378                        }
1379                        break;
1380
1381                case ST_WHILE:
1382                        n2 = condition();
1383                        n = chain_loop(NULL);
1384                        n->l.n = n2;
1385                        break;
1386
1387                case ST_DO:
1388                        n2 = chain_node(OC_EXEC);
1389                        n = chain_loop(NULL);
1390                        n2->a.n = n->a.n;
1391                        next_token(TC_WHILE);
1392                        n->l.n = condition();
1393                        break;
1394
1395                case ST_FOR:
1396                        next_token(TC_SEQSTART);
1397                        n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1398                        if (t_tclass & TC_SEQTERM) {    /* for-in */
1399                                if ((n2->info & OPCLSMASK) != OC_IN)
1400                                        syntax_error(EMSG_UNEXP_TOKEN);
1401                                n = chain_node(OC_WALKINIT | VV);
1402                                n->l.n = n2->l.n;
1403                                n->r.n = n2->r.n;
1404                                n = chain_loop(NULL);
1405                                n->info = OC_WALKNEXT | Vx;
1406                                n->l.n = n2->l.n;
1407                        } else {                        /* for (;;) */
1408                                n = chain_node(OC_EXEC | Vx);
1409                                n->l.n = n2;
1410                                n2 = parse_expr(TC_SEMICOL);
1411                                n3 = parse_expr(TC_SEQTERM);
1412                                n = chain_loop(n3);
1413                                n->l.n = n2;
1414                                if (!n2)
1415                                        n->info = OC_EXEC;
1416                        }
1417                        break;
1418
1419                case OC_PRINT:
1420                case OC_PRINTF:
1421                        n = chain_node(t_info);
1422                        n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1423                        if (t_tclass & TC_OUTRDR) {
1424                                n->info |= t_info;
1425                                n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1426                        }
1427                        if (t_tclass & TC_GRPTERM)
1428                                rollback_token();
1429                        break;
1430
1431                case OC_BREAK:
1432                        n = chain_node(OC_EXEC);
1433                        n->a.n = break_ptr;
1434                        break;
1435
1436                case OC_CONTINUE:
1437                        n = chain_node(OC_EXEC);
1438                        n->a.n = continue_ptr;
1439                        break;
1440
1441                /* delete, next, nextfile, return, exit */
1442                default:
1443                        chain_expr(t_info);
1444                }
1445        }
1446}
1447
1448static void parse_program(char *p)
1449{
1450        uint32_t tclass;
1451        node *cn;
1452        func *f;
1453        var *v;
1454
1455        g_pos = p;
1456        t_lineno = 1;
1457        while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1458                        TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1459
1460                if (tclass & TC_OPTERM)
1461                        continue;
1462
1463                seq = &mainseq;
1464                if (tclass & TC_BEGIN) {
1465                        seq = &beginseq;
1466                        chain_group();
1467
1468                } else if (tclass & TC_END) {
1469                        seq = &endseq;
1470                        chain_group();
1471
1472                } else if (tclass & TC_FUNCDECL) {
1473                        next_token(TC_FUNCTION);
1474                        g_pos++;
1475                        f = newfunc(t_string);
1476                        f->body.first = NULL;
1477                        f->nargs = 0;
1478                        while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1479                                v = findvar(ahash, t_string);
1480                                v->x.aidx = f->nargs++;
1481
1482                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1483                                        break;
1484                        }
1485                        seq = &f->body;
1486                        chain_group();
1487                        clear_array(ahash);
1488
1489                } else if (tclass & TC_OPSEQ) {
1490                        rollback_token();
1491                        cn = chain_node(OC_TEST);
1492                        cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1493                        if (t_tclass & TC_GRPSTART) {
1494                                rollback_token();
1495                                chain_group();
1496                        } else {
1497                                chain_node(OC_PRINT);
1498                        }
1499                        cn->r.n = mainseq.last;
1500
1501                } else /* if (tclass & TC_GRPSTART) */ {
1502                        rollback_token();
1503                        chain_group();
1504                }
1505        }
1506}
1507
1508
1509/* -------- program execution part -------- */
1510
1511static node *mk_splitter(const char *s, tsplitter *spl)
1512{
1513        regex_t *re, *ire;
1514        node *n;
1515
1516        re = &spl->re[0];
1517        ire = &spl->re[1];
1518        n = &spl->n;
1519        if ((n->info & OPCLSMASK) == OC_REGEXP) {
1520                regfree(re);
1521                regfree(ire); // TODO: nuke ire, use re+1?
1522        }
1523        if (s[0] && s[1]) { /* strlen(s) > 1 */
1524                mk_re_node(s, n, re);
1525        } else {
1526                n->info = (uint32_t) s[0];
1527        }
1528
1529        return n;
1530}
1531
1532/* use node as a regular expression. Supplied with node ptr and regex_t
1533 * storage space. Return ptr to regex (if result points to preg, it should
1534 * be later regfree'd manually
1535 */
1536static regex_t *as_regex(node *op, regex_t *preg)
1537{
1538        int cflags;
1539        var *v;
1540        const char *s;
1541
1542        if ((op->info & OPCLSMASK) == OC_REGEXP) {
1543                return icase ? op->r.ire : op->l.re;
1544        }
1545        v = nvalloc(1);
1546        s = getvar_s(evaluate(op, v));
1547
1548        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1549        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1550         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1551         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1552         * (maybe gsub is not supposed to use REG_EXTENDED?).
1553         */
1554        if (regcomp(preg, s, cflags)) {
1555                cflags &= ~REG_EXTENDED;
1556                xregcomp(preg, s, cflags);
1557        }
1558        nvfree(v);
1559        return preg;
1560}
1561
1562/* gradually increasing buffer.
1563 * note that we reallocate even if n == old_size,
1564 * and thus there is at least one extra allocated byte.
1565 */
1566static char* qrealloc(char *b, int n, int *size)
1567{
1568        if (!b || n >= *size) {
1569                *size = n + (n>>1) + 80;
1570                b = xrealloc(b, *size);
1571        }
1572        return b;
1573}
1574
1575/* resize field storage space */
1576static void fsrealloc(int size)
1577{
1578        int i;
1579
1580        if (size >= maxfields) {
1581                i = maxfields;
1582                maxfields = size + 16;
1583                Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1584                for (; i < maxfields; i++) {
1585                        Fields[i].type = VF_SPECIAL;
1586                        Fields[i].string = NULL;
1587                }
1588        }
1589        /* if size < nfields, clear extra field variables */
1590        for (i = size; i < nfields; i++) {
1591                clrvar(Fields + i);
1592        }
1593        nfields = size;
1594}
1595
1596static int awk_split(const char *s, node *spl, char **slist)
1597{
1598        int l, n;
1599        char c[4];
1600        char *s1;
1601        regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1602
1603        /* in worst case, each char would be a separate field */
1604        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1605        strcpy(s1, s);
1606
1607        c[0] = c[1] = (char)spl->info;
1608        c[2] = c[3] = '\0';
1609        if (*getvar_s(intvar[RS]) == '\0')
1610                c[2] = '\n';
1611
1612        n = 0;
1613        if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1614                if (!*s)
1615                        return n; /* "": zero fields */
1616                n++; /* at least one field will be there */
1617                do {
1618                        l = strcspn(s, c+2); /* len till next NUL or \n */
1619                        if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1620                         && pmatch[0].rm_so <= l
1621                        ) {
1622                                l = pmatch[0].rm_so;
1623                                if (pmatch[0].rm_eo == 0) {
1624                                        l++;
1625                                        pmatch[0].rm_eo++;
1626                                }
1627                                n++; /* we saw yet another delimiter */
1628                        } else {
1629                                pmatch[0].rm_eo = l;
1630                                if (s[l])
1631                                        pmatch[0].rm_eo++;
1632                        }
1633                        memcpy(s1, s, l);
1634                        /* make sure we remove *all* of the separator chars */
1635                        do {
1636                                s1[l] = '\0';
1637                        } while (++l < pmatch[0].rm_eo);
1638                        nextword(&s1);
1639                        s += pmatch[0].rm_eo;
1640                } while (*s);
1641                return n;
1642        }
1643        if (c[0] == '\0') {  /* null split */
1644                while (*s) {
1645                        *s1++ = *s++;
1646                        *s1++ = '\0';
1647                        n++;
1648                }
1649                return n;
1650        }
1651        if (c[0] != ' ') {  /* single-character split */
1652                if (icase) {
1653                        c[0] = toupper(c[0]);
1654                        c[1] = tolower(c[1]);
1655                }
1656                if (*s1)
1657                        n++;
1658                while ((s1 = strpbrk(s1, c)) != NULL) {
1659                        *s1++ = '\0';
1660                        n++;
1661                }
1662                return n;
1663        }
1664        /* space split */
1665        while (*s) {
1666                s = skip_whitespace(s);
1667                if (!*s)
1668                        break;
1669                n++;
1670                while (*s && !isspace(*s))
1671                        *s1++ = *s++;
1672                *s1++ = '\0';
1673        }
1674        return n;
1675}
1676
1677static void split_f0(void)
1678{
1679/* static char *fstrings; */
1680#define fstrings (G.split_f0__fstrings)
1681
1682        int i, n;
1683        char *s;
1684
1685        if (is_f0_split)
1686                return;
1687
1688        is_f0_split = TRUE;
1689        free(fstrings);
1690        fsrealloc(0);
1691        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1692        fsrealloc(n);
1693        s = fstrings;
1694        for (i = 0; i < n; i++) {
1695                Fields[i].string = nextword(&s);
1696                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1697        }
1698
1699        /* set NF manually to avoid side effects */
1700        clrvar(intvar[NF]);
1701        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1702        intvar[NF]->number = nfields;
1703#undef fstrings
1704}
1705
1706/* perform additional actions when some internal variables changed */
1707static void handle_special(var *v)
1708{
1709        int n;
1710        char *b;
1711        const char *sep, *s;
1712        int sl, l, len, i, bsize;
1713
1714        if (!(v->type & VF_SPECIAL))
1715                return;
1716
1717        if (v == intvar[NF]) {
1718                n = (int)getvar_i(v);
1719                fsrealloc(n);
1720
1721                /* recalculate $0 */
1722                sep = getvar_s(intvar[OFS]);
1723                sl = strlen(sep);
1724                b = NULL;
1725                len = 0;
1726                for (i = 0; i < n; i++) {
1727                        s = getvar_s(&Fields[i]);
1728                        l = strlen(s);
1729                        if (b) {
1730                                memcpy(b+len, sep, sl);
1731                                len += sl;
1732                        }
1733                        b = qrealloc(b, len+l+sl, &bsize);
1734                        memcpy(b+len, s, l);
1735                        len += l;
1736                }
1737                if (b)
1738                        b[len] = '\0';
1739                setvar_p(intvar[F0], b);
1740                is_f0_split = TRUE;
1741
1742        } else if (v == intvar[F0]) {
1743                is_f0_split = FALSE;
1744
1745        } else if (v == intvar[FS]) {
1746                mk_splitter(getvar_s(v), &fsplitter);
1747
1748        } else if (v == intvar[RS]) {
1749                mk_splitter(getvar_s(v), &rsplitter);
1750
1751        } else if (v == intvar[IGNORECASE]) {
1752                icase = istrue(v);
1753
1754        } else {                                /* $n */
1755                n = getvar_i(intvar[NF]);
1756                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1757                /* right here v is invalid. Just to note... */
1758        }
1759}
1760
1761/* step through func/builtin/etc arguments */
1762static node *nextarg(node **pn)
1763{
1764        node *n;
1765
1766        n = *pn;
1767        if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1768                *pn = n->r.n;
1769                n = n->l.n;
1770        } else {
1771                *pn = NULL;
1772        }
1773        return n;
1774}
1775
1776static void hashwalk_init(var *v, xhash *array)
1777{
1778        hash_item *hi;
1779        unsigned i;
1780        walker_list *w;
1781        walker_list *prev_walker;
1782
1783        if (v->type & VF_WALK) {
1784                prev_walker = v->x.walker;
1785        } else {
1786                v->type |= VF_WALK;
1787                prev_walker = NULL;
1788        }
1789        debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1790
1791        w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1792        debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1793        w->cur = w->end = w->wbuf;
1794        w->prev = prev_walker;
1795        for (i = 0; i < array->csize; i++) {
1796                hi = array->items[i];
1797                while (hi) {
1798                        strcpy(w->end, hi->name);
1799                        nextword(&w->end);
1800                        hi = hi->next;
1801                }
1802        }
1803}
1804
1805static int hashwalk_next(var *v)
1806{
1807        walker_list *w = v->x.walker;
1808
1809        if (w->cur >= w->end) {
1810                walker_list *prev_walker = w->prev;
1811
1812                debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1813                free(w);
1814                v->x.walker = prev_walker;
1815                return FALSE;
1816        }
1817
1818        setvar_s(v, nextword(&w->cur));
1819        return TRUE;
1820}
1821
1822/* evaluate node, return 1 when result is true, 0 otherwise */
1823static int ptest(node *pattern)
1824{
1825        /* ptest__v is "static": to save stack space? */
1826        return istrue(evaluate(pattern, &G.ptest__v));
1827}
1828
1829/* read next record from stream rsm into a variable v */
1830static int awk_getline(rstream *rsm, var *v)
1831{
1832        char *b;
1833        regmatch_t pmatch[2];
1834        int size, a, p, pp = 0;
1835        int fd, so, eo, r, rp;
1836        char c, *m, *s;
1837
1838        debug_printf_eval("entered %s()\n", __func__);
1839
1840        /* we're using our own buffer since we need access to accumulating
1841         * characters
1842         */
1843        fd = fileno(rsm->F);
1844        m = rsm->buffer;
1845        a = rsm->adv;
1846        p = rsm->pos;
1847        size = rsm->size;
1848        c = (char) rsplitter.n.info;
1849        rp = 0;
1850
1851        if (!m)
1852                m = qrealloc(m, 256, &size);
1853
1854        do {
1855                b = m + a;
1856                so = eo = p;
1857                r = 1;
1858                if (p > 0) {
1859                        if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1860                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1861                                                        b, 1, pmatch, 0) == 0) {
1862                                        so = pmatch[0].rm_so;
1863                                        eo = pmatch[0].rm_eo;
1864                                        if (b[eo] != '\0')
1865                                                break;
1866                                }
1867                        } else if (c != '\0') {
1868                                s = strchr(b+pp, c);
1869                                if (!s)
1870                                        s = memchr(b+pp, '\0', p - pp);
1871                                if (s) {
1872                                        so = eo = s-b;
1873                                        eo++;
1874                                        break;
1875                                }
1876                        } else {
1877                                while (b[rp] == '\n')
1878                                        rp++;
1879                                s = strstr(b+rp, "\n\n");
1880                                if (s) {
1881                                        so = eo = s-b;
1882                                        while (b[eo] == '\n')
1883                                                eo++;
1884                                        if (b[eo] != '\0')
1885                                                break;
1886                                }
1887                        }
1888                }
1889
1890                if (a > 0) {
1891                        memmove(m, m+a, p+1);
1892                        b = m;
1893                        a = 0;
1894                }
1895
1896                m = qrealloc(m, a+p+128, &size);
1897                b = m + a;
1898                pp = p;
1899                p += safe_read(fd, b+p, size-p-1);
1900                if (p < pp) {
1901                        p = 0;
1902                        r = 0;
1903                        setvar_i(intvar[ERRNO], errno);
1904                }
1905                b[p] = '\0';
1906
1907        } while (p > pp);
1908
1909        if (p == 0) {
1910                r--;
1911        } else {
1912                c = b[so]; b[so] = '\0';
1913                setvar_s(v, b+rp);
1914                v->type |= VF_USER;
1915                b[so] = c;
1916                c = b[eo]; b[eo] = '\0';
1917                setvar_s(intvar[RT], b+so);
1918                b[eo] = c;
1919        }
1920
1921        rsm->buffer = m;
1922        rsm->adv = a + eo;
1923        rsm->pos = p - eo;
1924        rsm->size = size;
1925
1926        debug_printf_eval("returning from %s(): %d\n", __func__, r);
1927
1928        return r;
1929}
1930
1931static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1932{
1933        int r = 0;
1934        char c;
1935        const char *s = format;
1936
1937        if (int_as_int && n == (int)n) {
1938                r = snprintf(b, size, "%d", (int)n);
1939        } else {
1940                do { c = *s; } while (c && *++s);
1941                if (strchr("diouxX", c)) {
1942                        r = snprintf(b, size, format, (int)n);
1943                } else if (strchr("eEfgG", c)) {
1944                        r = snprintf(b, size, format, n);
1945                } else {
1946                        syntax_error(EMSG_INV_FMT);
1947                }
1948        }
1949        return r;
1950}
1951
1952/* formatted output into an allocated buffer, return ptr to buffer */
1953static char *awk_printf(node *n)
1954{
1955        char *b = NULL;
1956        char *fmt, *s, *f;
1957        const char *s1;
1958        int i, j, incr, bsize;
1959        char c, c1;
1960        var *v, *arg;
1961
1962        v = nvalloc(1);
1963        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1964
1965        i = 0;
1966        while (*f) {
1967                s = f;
1968                while (*f && (*f != '%' || *++f == '%'))
1969                        f++;
1970                while (*f && !isalpha(*f)) {
1971                        if (*f == '*')
1972                                syntax_error("%*x formats are not supported");
1973                        f++;
1974                }
1975
1976                incr = (f - s) + MAXVARFMT;
1977                b = qrealloc(b, incr + i, &bsize);
1978                c = *f;
1979                if (c != '\0')
1980                        f++;
1981                c1 = *f;
1982                *f = '\0';
1983                arg = evaluate(nextarg(&n), v);
1984
1985                j = i;
1986                if (c == 'c' || !c) {
1987                        i += sprintf(b+i, s, is_numeric(arg) ?
1988                                        (char)getvar_i(arg) : *getvar_s(arg));
1989                } else if (c == 's') {
1990                        s1 = getvar_s(arg);
1991                        b = qrealloc(b, incr+i+strlen(s1), &bsize);
1992                        i += sprintf(b+i, s, s1);
1993                } else {
1994                        i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1995                }
1996                *f = c1;
1997
1998                /* if there was an error while sprintf, return value is negative */
1999                if (i < j)
2000                        i = j;
2001        }
2002
2003        free(fmt);
2004        nvfree(v);
2005        b = xrealloc(b, i + 1);
2006        b[i] = '\0';
2007        return b;
2008}
2009
2010/* Common substitution routine.
2011 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2012 * store result into (dest), return number of substitutions.
2013 * If nm = 0, replace all matches.
2014 * If src or dst is NULL, use $0.
2015 * If subexp != 0, enable subexpression matching (\1-\9).
2016 */
2017static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2018{
2019        char *resbuf;
2020        const char *sp;
2021        int match_no, residx, replen, resbufsize;
2022        int regexec_flags;
2023        regmatch_t pmatch[10];
2024        regex_t sreg, *regex;
2025
2026        resbuf = NULL;
2027        residx = 0;
2028        match_no = 0;
2029        regexec_flags = 0;
2030        regex = as_regex(rn, &sreg);
2031        sp = getvar_s(src ? src : intvar[F0]);
2032        replen = strlen(repl);
2033        while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2034                int so = pmatch[0].rm_so;
2035                int eo = pmatch[0].rm_eo;
2036
2037                //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2038                resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2039                memcpy(resbuf + residx, sp, eo);
2040                residx += eo;
2041                if (++match_no >= nm) {
2042                        const char *s;
2043                        int nbs;
2044
2045                        /* replace */
2046                        residx -= (eo - so);
2047                        nbs = 0;
2048                        for (s = repl; *s; s++) {
2049                                char c = resbuf[residx++] = *s;
2050                                if (c == '\\') {
2051                                        nbs++;
2052                                        continue;
2053                                }
2054                                if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2055                                        int j;
2056                                        residx -= ((nbs + 3) >> 1);
2057                                        j = 0;
2058                                        if (c != '&') {
2059                                                j = c - '0';
2060                                                nbs++;
2061                                        }
2062                                        if (nbs % 2) {
2063                                                resbuf[residx++] = c;
2064                                        } else {
2065                                                int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2066                                                resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2067                                                memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2068                                                residx += n;
2069                                        }
2070                                }
2071                                nbs = 0;
2072                        }
2073                }
2074
2075                regexec_flags = REG_NOTBOL;
2076                sp += eo;
2077                if (match_no == nm)
2078                        break;
2079                if (eo == so) {
2080                        /* Empty match (e.g. "b*" will match anywhere).
2081                         * Advance by one char. */
2082//BUG (bug 1333):
2083//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2084//... and will erroneously match "b" even though it is NOT at the word start.
2085//we need REG_NOTBOW but it does not exist...
2086//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2087//it should be able to do it correctly.
2088                        /* Subtle: this is safe only because
2089                         * qrealloc allocated at least one extra byte */
2090                        resbuf[residx] = *sp;
2091                        if (*sp == '\0')
2092                                goto ret;
2093                        sp++;
2094                        residx++;
2095                }
2096        }
2097
2098        resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2099        strcpy(resbuf + residx, sp);
2100 ret:
2101        //bb_error_msg("end sp:'%s'%p", sp,sp);
2102        setvar_p(dest ? dest : intvar[F0], resbuf);
2103        if (regex == &sreg)
2104                regfree(regex);
2105        return match_no;
2106}
2107
2108static NOINLINE int do_mktime(const char *ds)
2109{
2110        struct tm then;
2111        int count;
2112
2113        /*memset(&then, 0, sizeof(then)); - not needed */
2114        then.tm_isdst = -1; /* default is unknown */
2115
2116        /* manpage of mktime says these fields are ints,
2117         * so we can sscanf stuff directly into them */
2118        count = sscanf(ds, "%u %u %u %u %u %u %d",
2119                &then.tm_year, &then.tm_mon, &then.tm_mday,
2120                &then.tm_hour, &then.tm_min, &then.tm_sec,
2121                &then.tm_isdst);
2122
2123        if (count < 6
2124         || (unsigned)then.tm_mon < 1
2125         || (unsigned)then.tm_year < 1900
2126        ) {
2127                return -1;
2128        }
2129
2130        then.tm_mon -= 1;
2131        then.tm_year -= 1900;
2132
2133        return mktime(&then);
2134}
2135
2136static NOINLINE var *exec_builtin(node *op, var *res)
2137{
2138#define tspl (G.exec_builtin__tspl)
2139
2140        var *tv;
2141        node *an[4];
2142        var *av[4];
2143        const char *as[4];
2144        regmatch_t pmatch[2];
2145        regex_t sreg, *re;
2146        node *spl;
2147        uint32_t isr, info;
2148        int nargs;
2149        time_t tt;
2150        int i, l, ll, n;
2151
2152        tv = nvalloc(4);
2153        isr = info = op->info;
2154        op = op->l.n;
2155
2156        av[2] = av[3] = NULL;
2157        for (i = 0; i < 4 && op; i++) {
2158                an[i] = nextarg(&op);
2159                if (isr & 0x09000000)
2160                        av[i] = evaluate(an[i], &tv[i]);
2161                if (isr & 0x08000000)
2162                        as[i] = getvar_s(av[i]);
2163                isr >>= 1;
2164        }
2165
2166        nargs = i;
2167        if ((uint32_t)nargs < (info >> 30))
2168                syntax_error(EMSG_TOO_FEW_ARGS);
2169
2170        info &= OPNMASK;
2171        switch (info) {
2172
2173        case B_a2:
2174                if (ENABLE_FEATURE_AWK_LIBM)
2175                        setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2176                else
2177                        syntax_error(EMSG_NO_MATH);
2178                break;
2179
2180        case B_sp: {
2181                char *s, *s1;
2182
2183                if (nargs > 2) {
2184                        spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2185                                an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2186                } else {
2187                        spl = &fsplitter.n;
2188                }
2189
2190                n = awk_split(as[0], spl, &s);
2191                s1 = s;
2192                clear_array(iamarray(av[1]));
2193                for (i = 1; i <= n; i++)
2194                        setari_u(av[1], i, nextword(&s));
2195                free(s1);
2196                setvar_i(res, n);
2197                break;
2198        }
2199
2200        case B_ss: {
2201                char *s;
2202
2203                l = strlen(as[0]);
2204                i = getvar_i(av[1]) - 1;
2205                if (i > l)
2206                        i = l;
2207                if (i < 0)
2208                        i = 0;
2209                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2210                if (n < 0)
2211                        n = 0;
2212                s = xstrndup(as[0]+i, n);
2213                setvar_p(res, s);
2214                break;
2215        }
2216
2217        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2218         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2219        case B_an:
2220                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2221                break;
2222
2223        case B_co:
2224                setvar_i(res, ~getvar_i_int(av[0]));
2225                break;
2226
2227        case B_ls:
2228                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2229                break;
2230
2231        case B_or:
2232                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2233                break;
2234
2235        case B_rs:
2236                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2237                break;
2238
2239        case B_xo:
2240                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2241                break;
2242
2243        case B_lo:
2244        case B_up: {
2245                char *s, *s1;
2246                s1 = s = xstrdup(as[0]);
2247                while (*s1) {
2248                        //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2249                        if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2250                                *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2251                        s1++;
2252                }
2253                setvar_p(res, s);
2254                break;
2255        }
2256
2257        case B_ix:
2258                n = 0;
2259                ll = strlen(as[1]);
2260                l = strlen(as[0]) - ll;
2261                if (ll > 0 && l >= 0) {
2262                        if (!icase) {
2263                                char *s = strstr(as[0], as[1]);
2264                                if (s)
2265                                        n = (s - as[0]) + 1;
2266                        } else {
2267                                /* this piece of code is terribly slow and
2268                                 * really should be rewritten
2269                                 */
2270                                for (i = 0; i <= l; i++) {
2271                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2272                                                n = i+1;
2273                                                break;
2274                                        }
2275                                }
2276                        }
2277                }
2278                setvar_i(res, n);
2279                break;
2280
2281        case B_ti:
2282                if (nargs > 1)
2283                        tt = getvar_i(av[1]);
2284                else
2285                        time(&tt);
2286                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2287                i = strftime(g_buf, MAXVARFMT,
2288                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2289                        localtime(&tt));
2290                g_buf[i] = '\0';
2291                setvar_s(res, g_buf);
2292                break;
2293
2294        case B_mt:
2295                setvar_i(res, do_mktime(as[0]));
2296                break;
2297
2298        case B_ma:
2299                re = as_regex(an[1], &sreg);
2300                n = regexec(re, as[0], 1, pmatch, 0);
2301                if (n == 0) {
2302                        pmatch[0].rm_so++;
2303                        pmatch[0].rm_eo++;
2304                } else {
2305                        pmatch[0].rm_so = 0;
2306                        pmatch[0].rm_eo = -1;
2307                }
2308                setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2309                setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2310                setvar_i(res, pmatch[0].rm_so);
2311                if (re == &sreg)
2312                        regfree(re);
2313                break;
2314
2315        case B_ge:
2316                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2317                break;
2318
2319        case B_gs:
2320                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2321                break;
2322
2323        case B_su:
2324                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2325                break;
2326        }
2327
2328        nvfree(tv);
2329        return res;
2330#undef tspl
2331}
2332
2333/*
2334 * Evaluate node - the heart of the program. Supplied with subtree
2335 * and place where to store result. returns ptr to result.
2336 */
2337#define XC(n) ((n) >> 8)
2338
2339static var *evaluate(node *op, var *res)
2340{
2341/* This procedure is recursive so we should count every byte */
2342#define fnargs (G.evaluate__fnargs)
2343/* seed is initialized to 1 */
2344#define seed   (G.evaluate__seed)
2345#define sreg   (G.evaluate__sreg)
2346
2347        var *v1;
2348
2349        if (!op)
2350                return setvar_s(res, NULL);
2351
2352        debug_printf_eval("entered %s()\n", __func__);
2353
2354        v1 = nvalloc(2);
2355
2356        while (op) {
2357                struct {
2358                        var *v;
2359                        const char *s;
2360                } L = L; /* for compiler */
2361                struct {
2362                        var *v;
2363                        const char *s;
2364                } R = R;
2365                double L_d = L_d;
2366                uint32_t opinfo;
2367                int opn;
2368                node *op1;
2369
2370                opinfo = op->info;
2371                opn = (opinfo & OPNMASK);
2372                g_lineno = op->lineno;
2373                op1 = op->l.n;
2374                debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2375
2376                /* execute inevitable things */
2377                if (opinfo & OF_RES1)
2378                        L.v = evaluate(op1, v1);
2379                if (opinfo & OF_RES2)
2380                        R.v = evaluate(op->r.n, v1+1);
2381                if (opinfo & OF_STR1) {
2382                        L.s = getvar_s(L.v);
2383                        debug_printf_eval("L.s:'%s'\n", L.s);
2384                }
2385                if (opinfo & OF_STR2) {
2386                        R.s = getvar_s(R.v);
2387                        debug_printf_eval("R.s:'%s'\n", R.s);
2388                }
2389                if (opinfo & OF_NUM1) {
2390                        L_d = getvar_i(L.v);
2391                        debug_printf_eval("L_d:%f\n", L_d);
2392                }
2393
2394                debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2395                switch (XC(opinfo & OPCLSMASK)) {
2396
2397                /* -- iterative node type -- */
2398
2399                /* test pattern */
2400                case XC( OC_TEST ):
2401                        if ((op1->info & OPCLSMASK) == OC_COMMA) {
2402                                /* it's range pattern */
2403                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2404                                        op->info |= OF_CHECKED;
2405                                        if (ptest(op1->r.n))
2406                                                op->info &= ~OF_CHECKED;
2407                                        op = op->a.n;
2408                                } else {
2409                                        op = op->r.n;
2410                                }
2411                        } else {
2412                                op = ptest(op1) ? op->a.n : op->r.n;
2413                        }
2414                        break;
2415
2416                /* just evaluate an expression, also used as unconditional jump */
2417                case XC( OC_EXEC ):
2418                        break;
2419
2420                /* branch, used in if-else and various loops */
2421                case XC( OC_BR ):
2422                        op = istrue(L.v) ? op->a.n : op->r.n;
2423                        break;
2424
2425                /* initialize for-in loop */
2426                case XC( OC_WALKINIT ):
2427                        hashwalk_init(L.v, iamarray(R.v));
2428                        break;
2429
2430                /* get next array item */
2431                case XC( OC_WALKNEXT ):
2432                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2433                        break;
2434
2435                case XC( OC_PRINT ):
2436                case XC( OC_PRINTF ): {
2437                        FILE *F = stdout;
2438
2439                        if (op->r.n) {
2440                                rstream *rsm = newfile(R.s);
2441                                if (!rsm->F) {
2442                                        if (opn == '|') {
2443                                                rsm->F = popen(R.s, "w");
2444                                                if (rsm->F == NULL)
2445                                                        bb_perror_msg_and_die("popen");
2446                                                rsm->is_pipe = 1;
2447                                        } else {
2448                                                rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2449                                        }
2450                                }
2451                                F = rsm->F;
2452                        }
2453
2454                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2455                                if (!op1) {
2456                                        fputs(getvar_s(intvar[F0]), F);
2457                                } else {
2458                                        while (op1) {
2459                                                var *v = evaluate(nextarg(&op1), v1);
2460                                                if (v->type & VF_NUMBER) {
2461                                                        fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2462                                                                        getvar_i(v), TRUE);
2463                                                        fputs(g_buf, F);
2464                                                } else {
2465                                                        fputs(getvar_s(v), F);
2466                                                }
2467
2468                                                if (op1)
2469                                                        fputs(getvar_s(intvar[OFS]), F);
2470                                        }
2471                                }
2472                                fputs(getvar_s(intvar[ORS]), F);
2473
2474                        } else {        /* OC_PRINTF */
2475                                char *s = awk_printf(op1);
2476                                fputs(s, F);
2477                                free(s);
2478                        }
2479                        fflush(F);
2480                        break;
2481                }
2482
2483                case XC( OC_DELETE ): {
2484                        uint32_t info = op1->info & OPCLSMASK;
2485                        var *v;
2486
2487                        if (info == OC_VAR) {
2488                                v = op1->l.v;
2489                        } else if (info == OC_FNARG) {
2490                                v = &fnargs[op1->l.aidx];
2491                        } else {
2492                                syntax_error(EMSG_NOT_ARRAY);
2493                        }
2494
2495                        if (op1->r.n) {
2496                                const char *s;
2497                                clrvar(L.v);
2498                                s = getvar_s(evaluate(op1->r.n, v1));
2499                                hash_remove(iamarray(v), s);
2500                        } else {
2501                                clear_array(iamarray(v));
2502                        }
2503                        break;
2504                }
2505
2506                case XC( OC_NEWSOURCE ):
2507                        g_progname = op->l.new_progname;
2508                        break;
2509
2510                case XC( OC_RETURN ):
2511                        copyvar(res, L.v);
2512                        break;
2513
2514                case XC( OC_NEXTFILE ):
2515                        nextfile = TRUE;
2516                case XC( OC_NEXT ):
2517                        nextrec = TRUE;
2518                case XC( OC_DONE ):
2519                        clrvar(res);
2520                        break;
2521
2522                case XC( OC_EXIT ):
2523                        awk_exit(L_d);
2524
2525                /* -- recursive node type -- */
2526
2527                case XC( OC_VAR ):
2528                        L.v = op->l.v;
2529                        if (L.v == intvar[NF])
2530                                split_f0();
2531                        goto v_cont;
2532
2533                case XC( OC_FNARG ):
2534                        L.v = &fnargs[op->l.aidx];
2535 v_cont:
2536                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2537                        break;
2538
2539                case XC( OC_IN ):
2540                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2541                        break;
2542
2543                case XC( OC_REGEXP ):
2544                        op1 = op;
2545                        L.s = getvar_s(intvar[F0]);
2546                        goto re_cont;
2547
2548                case XC( OC_MATCH ):
2549                        op1 = op->r.n;
2550 re_cont:
2551                        {
2552                                regex_t *re = as_regex(op1, &sreg);
2553                                int i = regexec(re, L.s, 0, NULL, 0);
2554                                if (re == &sreg)
2555                                        regfree(re);
2556                                setvar_i(res, (i == 0) ^ (opn == '!'));
2557                        }
2558                        break;
2559
2560                case XC( OC_MOVE ):
2561                        debug_printf_eval("MOVE\n");
2562                        /* if source is a temporary string, jusk relink it to dest */
2563//Disabled: if R.v is numeric but happens to have cached R.v->string,
2564//then L.v ends up being a string, which is wrong
2565//                      if (R.v == v1+1 && R.v->string) {
2566//                              res = setvar_p(L.v, R.v->string);
2567//                              R.v->string = NULL;
2568//                      } else {
2569                                res = copyvar(L.v, R.v);
2570//                      }
2571                        break;
2572
2573                case XC( OC_TERNARY ):
2574                        if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2575                                syntax_error(EMSG_POSSIBLE_ERROR);
2576                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2577                        break;
2578
2579                case XC( OC_FUNC ): {
2580                        var *vbeg, *v;
2581                        const char *sv_progname;
2582
2583                        if (!op->r.f->body.first)
2584                                syntax_error(EMSG_UNDEF_FUNC);
2585
2586                        vbeg = v = nvalloc(op->r.f->nargs + 1);
2587                        while (op1) {
2588                                var *arg = evaluate(nextarg(&op1), v1);
2589                                copyvar(v, arg);
2590                                v->type |= VF_CHILD;
2591                                v->x.parent = arg;
2592                                if (++v - vbeg >= op->r.f->nargs)
2593                                        break;
2594                        }
2595
2596                        v = fnargs;
2597                        fnargs = vbeg;
2598                        sv_progname = g_progname;
2599
2600                        res = evaluate(op->r.f->body.first, res);
2601
2602                        g_progname = sv_progname;
2603                        nvfree(fnargs);
2604                        fnargs = v;
2605
2606                        break;
2607                }
2608
2609                case XC( OC_GETLINE ):
2610                case XC( OC_PGETLINE ): {
2611                        rstream *rsm;
2612                        int i;
2613
2614                        if (op1) {
2615                                rsm = newfile(L.s);
2616                                if (!rsm->F) {
2617                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2618                                                rsm->F = popen(L.s, "r");
2619                                                rsm->is_pipe = TRUE;
2620                                        } else {
2621                                                rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2622                                        }
2623                                }
2624                        } else {
2625                                if (!iF)
2626                                        iF = next_input_file();
2627                                rsm = iF;
2628                        }
2629
2630                        if (!rsm->F) {
2631                                setvar_i(intvar[ERRNO], errno);
2632                                setvar_i(res, -1);
2633                                break;
2634                        }
2635
2636                        if (!op->r.n)
2637                                R.v = intvar[F0];
2638
2639                        i = awk_getline(rsm, R.v);
2640                        if (i > 0 && !op1) {
2641                                incvar(intvar[FNR]);
2642                                incvar(intvar[NR]);
2643                        }
2644                        setvar_i(res, i);
2645                        break;
2646                }
2647
2648                /* simple builtins */
2649                case XC( OC_FBLTIN ): {
2650                        double R_d = R_d; /* for compiler */
2651
2652                        switch (opn) {
2653                        case F_in:
2654                                R_d = (int)L_d;
2655                                break;
2656
2657                        case F_rn:
2658                                R_d = (double)rand() / (double)RAND_MAX;
2659                                break;
2660
2661                        case F_co:
2662                                if (ENABLE_FEATURE_AWK_LIBM) {
2663                                        R_d = cos(L_d);
2664                                        break;
2665                                }
2666
2667                        case F_ex:
2668                                if (ENABLE_FEATURE_AWK_LIBM) {
2669                                        R_d = exp(L_d);
2670                                        break;
2671                                }
2672
2673                        case F_lg:
2674                                if (ENABLE_FEATURE_AWK_LIBM) {
2675                                        R_d = log(L_d);
2676                                        break;
2677                                }
2678
2679                        case F_si:
2680                                if (ENABLE_FEATURE_AWK_LIBM) {
2681                                        R_d = sin(L_d);
2682                                        break;
2683                                }
2684
2685                        case F_sq:
2686                                if (ENABLE_FEATURE_AWK_LIBM) {
2687                                        R_d = sqrt(L_d);
2688                                        break;
2689                                }
2690
2691                                syntax_error(EMSG_NO_MATH);
2692                                break;
2693
2694                        case F_sr:
2695                                R_d = (double)seed;
2696                                seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2697                                srand(seed);
2698                                break;
2699
2700                        case F_ti:
2701                                R_d = time(NULL);
2702                                break;
2703
2704                        case F_le:
2705                                if (!op1)
2706                                        L.s = getvar_s(intvar[F0]);
2707                                R_d = strlen(L.s);
2708                                break;
2709
2710                        case F_sy:
2711                                fflush_all();
2712                                R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2713                                                ? (system(L.s) >> 8) : 0;
2714                                break;
2715
2716                        case F_ff:
2717                                if (!op1) {
2718                                        fflush(stdout);
2719                                } else if (L.s && *L.s) {
2720                                        rstream *rsm = newfile(L.s);
2721                                        fflush(rsm->F);
2722                                } else {
2723                                        fflush_all();
2724                                }
2725                                break;
2726
2727                        case F_cl: {
2728                                rstream *rsm;
2729                                int err = 0;
2730                                rsm = (rstream *)hash_search(fdhash, L.s);
2731                                debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2732                                if (rsm) {
2733                                        debug_printf_eval("OC_FBLTIN F_cl "
2734                                                "rsm->is_pipe:%d, ->F:%p\n",
2735                                                rsm->is_pipe, rsm->F);
2736                                        /* Can be NULL if open failed. Example:
2737                                         * getline line <"doesnt_exist";
2738                                         * close("doesnt_exist"); <--- here rsm->F is NULL
2739                                         */
2740                                        if (rsm->F)
2741                                                err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2742                                        free(rsm->buffer);
2743                                        hash_remove(fdhash, L.s);
2744                                }
2745                                if (err)
2746                                        setvar_i(intvar[ERRNO], errno);
2747                                R_d = (double)err;
2748                                break;
2749                        }
2750                        } /* switch */
2751                        setvar_i(res, R_d);
2752                        break;
2753                }
2754
2755                case XC( OC_BUILTIN ):
2756                        res = exec_builtin(op, res);
2757                        break;
2758
2759                case XC( OC_SPRINTF ):
2760                        setvar_p(res, awk_printf(op1));
2761                        break;
2762
2763                case XC( OC_UNARY ): {
2764                        double Ld, R_d;
2765
2766                        Ld = R_d = getvar_i(R.v);
2767                        switch (opn) {
2768                        case 'P':
2769                                Ld = ++R_d;
2770                                goto r_op_change;
2771                        case 'p':
2772                                R_d++;
2773                                goto r_op_change;
2774                        case 'M':
2775                                Ld = --R_d;
2776                                goto r_op_change;
2777                        case 'm':
2778                                R_d--;
2779 r_op_change:
2780                                setvar_i(R.v, R_d);
2781                                break;
2782                        case '!':
2783                                Ld = !istrue(R.v);
2784                                break;
2785                        case '-':
2786                                Ld = -R_d;
2787                                break;
2788                        }
2789                        setvar_i(res, Ld);
2790                        break;
2791                }
2792
2793                case XC( OC_FIELD ): {
2794                        int i = (int)getvar_i(R.v);
2795                        if (i == 0) {
2796                                res = intvar[F0];
2797                        } else {
2798                                split_f0();
2799                                if (i > nfields)
2800                                        fsrealloc(i);
2801                                res = &Fields[i - 1];
2802                        }
2803                        break;
2804                }
2805
2806                /* concatenation (" ") and index joining (",") */
2807                case XC( OC_CONCAT ):
2808                case XC( OC_COMMA ): {
2809                        const char *sep = "";
2810                        if ((opinfo & OPCLSMASK) == OC_COMMA)
2811                                sep = getvar_s(intvar[SUBSEP]);
2812                        setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2813                        break;
2814                }
2815
2816                case XC( OC_LAND ):
2817                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2818                        break;
2819
2820                case XC( OC_LOR ):
2821                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2822                        break;
2823
2824                case XC( OC_BINARY ):
2825                case XC( OC_REPLACE ): {
2826                        double R_d = getvar_i(R.v);
2827                        debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2828                        switch (opn) {
2829                        case '+':
2830                                L_d += R_d;
2831                                break;
2832                        case '-':
2833                                L_d -= R_d;
2834                                break;
2835                        case '*':
2836                                L_d *= R_d;
2837                                break;
2838                        case '/':
2839                                if (R_d == 0)
2840                                        syntax_error(EMSG_DIV_BY_ZERO);
2841                                L_d /= R_d;
2842                                break;
2843                        case '&':
2844                                if (ENABLE_FEATURE_AWK_LIBM)
2845                                        L_d = pow(L_d, R_d);
2846                                else
2847                                        syntax_error(EMSG_NO_MATH);
2848                                break;
2849                        case '%':
2850                                if (R_d == 0)
2851                                        syntax_error(EMSG_DIV_BY_ZERO);
2852                                L_d -= (int)(L_d / R_d) * R_d;
2853                                break;
2854                        }
2855                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2856                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2857                        break;
2858                }
2859
2860                case XC( OC_COMPARE ): {
2861                        int i = i; /* for compiler */
2862                        double Ld;
2863
2864                        if (is_numeric(L.v) && is_numeric(R.v)) {
2865                                Ld = getvar_i(L.v) - getvar_i(R.v);
2866                        } else {
2867                                const char *l = getvar_s(L.v);
2868                                const char *r = getvar_s(R.v);
2869                                Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2870                        }
2871                        switch (opn & 0xfe) {
2872                        case 0:
2873                                i = (Ld > 0);
2874                                break;
2875                        case 2:
2876                                i = (Ld >= 0);
2877                                break;
2878                        case 4:
2879                                i = (Ld == 0);
2880                                break;
2881                        }
2882                        setvar_i(res, (i == 0) ^ (opn & 1));
2883                        break;
2884                }
2885
2886                default:
2887                        syntax_error(EMSG_POSSIBLE_ERROR);
2888                }
2889                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2890                        op = op->a.n;
2891                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2892                        break;
2893                if (nextrec)
2894                        break;
2895        } /* while (op) */
2896
2897        nvfree(v1);
2898        debug_printf_eval("returning from %s(): %p\n", __func__, res);
2899        return res;
2900#undef fnargs
2901#undef seed
2902#undef sreg
2903}
2904
2905
2906/* -------- main & co. -------- */
2907
2908static int awk_exit(int r)
2909{
2910        var tv;
2911        unsigned i;
2912        hash_item *hi;
2913
2914        zero_out_var(&tv);
2915
2916        if (!exiting) {
2917                exiting = TRUE;
2918                nextrec = FALSE;
2919                evaluate(endseq.first, &tv);
2920        }
2921
2922        /* waiting for children */
2923        for (i = 0; i < fdhash->csize; i++) {
2924                hi = fdhash->items[i];
2925                while (hi) {
2926                        if (hi->data.rs.F && hi->data.rs.is_pipe)
2927                                pclose(hi->data.rs.F);
2928                        hi = hi->next;
2929                }
2930        }
2931
2932        exit(r);
2933}
2934
2935/* if expr looks like "var=value", perform assignment and return 1,
2936 * otherwise return 0 */
2937static int is_assignment(const char *expr)
2938{
2939        char *exprc, *val, *s, *s1;
2940
2941        if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2942                return FALSE;
2943        }
2944
2945        exprc = xstrdup(expr);
2946        val = exprc + (val - expr);
2947        *val++ = '\0';
2948
2949        s = s1 = val;
2950        while ((*s1 = nextchar(&s)) != '\0')
2951                s1++;
2952
2953        setvar_u(newvar(exprc), val);
2954        free(exprc);
2955        return TRUE;
2956}
2957
2958/* switch to next input file */
2959static rstream *next_input_file(void)
2960{
2961#define rsm          (G.next_input_file__rsm)
2962#define files_happen (G.next_input_file__files_happen)
2963
2964        FILE *F = NULL;
2965        const char *fname, *ind;
2966
2967        if (rsm.F)
2968                fclose(rsm.F);
2969        rsm.F = NULL;
2970        rsm.pos = rsm.adv = 0;
2971
2972        do {
2973                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2974                        if (files_happen)
2975                                return NULL;
2976                        fname = "-";
2977                        F = stdin;
2978                } else {
2979                        ind = getvar_s(incvar(intvar[ARGIND]));
2980                        fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2981                        if (fname && *fname && !is_assignment(fname))
2982                                F = xfopen_stdin(fname);
2983                }
2984        } while (!F);
2985
2986        files_happen = TRUE;
2987        setvar_s(intvar[FILENAME], fname);
2988        rsm.F = F;
2989        return &rsm;
2990#undef rsm
2991#undef files_happen
2992}
2993
2994int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2995int awk_main(int argc, char **argv)
2996{
2997        unsigned opt;
2998        char *opt_F, *opt_W;
2999        llist_t *list_v = NULL;
3000        llist_t *list_f = NULL;
3001        int i, j;
3002        var *v;
3003        var tv;
3004        char **envp;
3005        char *vnames = (char *)vNames; /* cheat */
3006        char *vvalues = (char *)vValues;
3007
3008        INIT_G();
3009
3010        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3011         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3012        if (ENABLE_LOCALE_SUPPORT)
3013                setlocale(LC_NUMERIC, "C");
3014
3015        zero_out_var(&tv);
3016
3017        /* allocate global buffer */
3018        g_buf = xmalloc(MAXVARFMT + 1);
3019
3020        vhash = hash_init();
3021        ahash = hash_init();
3022        fdhash = hash_init();
3023        fnhash = hash_init();
3024
3025        /* initialize variables */
3026        for (i = 0; *vnames; i++) {
3027                intvar[i] = v = newvar(nextword(&vnames));
3028                if (*vvalues != '\377')
3029                        setvar_s(v, nextword(&vvalues));
3030                else
3031                        setvar_i(v, 0);
3032
3033                if (*vnames == '*') {
3034                        v->type |= VF_SPECIAL;
3035                        vnames++;
3036                }
3037        }
3038
3039        handle_special(intvar[FS]);
3040        handle_special(intvar[RS]);
3041
3042        newfile("/dev/stdin")->F = stdin;
3043        newfile("/dev/stdout")->F = stdout;
3044        newfile("/dev/stderr")->F = stderr;
3045
3046        /* Huh, people report that sometimes environ is NULL. Oh well. */
3047        if (environ) for (envp = environ; *envp; envp++) {
3048                /* environ is writable, thus we don't strdup it needlessly */
3049                char *s = *envp;
3050                char *s1 = strchr(s, '=');
3051                if (s1) {
3052                        *s1 = '\0';
3053                        /* Both findvar and setvar_u take const char*
3054                         * as 2nd arg -> environment is not trashed */
3055                        setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3056                        *s1 = '=';
3057                }
3058        }
3059        opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3060        opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3061        argv += optind;
3062        argc -= optind;
3063        if (opt & 0x1)
3064                setvar_s(intvar[FS], opt_F); // -F
3065        while (list_v) { /* -v */
3066                if (!is_assignment(llist_pop(&list_v)))
3067                        bb_show_usage();
3068        }
3069        if (list_f) { /* -f */
3070                do {
3071                        char *s = NULL;
3072                        FILE *from_file;
3073
3074                        g_progname = llist_pop(&list_f);
3075                        from_file = xfopen_stdin(g_progname);
3076                        /* one byte is reserved for some trick in next_token */
3077                        for (i = j = 1; j > 0; i += j) {
3078                                s = xrealloc(s, i + 4096);
3079                                j = fread(s + i, 1, 4094, from_file);
3080                        }
3081                        s[i] = '\0';
3082                        fclose(from_file);
3083                        parse_program(s + 1);
3084                        free(s);
3085                } while (list_f);
3086                argc++;
3087        } else { // no -f: take program from 1st parameter
3088                if (!argc)
3089                        bb_show_usage();
3090                g_progname = "cmd. line";
3091                parse_program(*argv++);
3092        }
3093        if (opt & 0x8) // -W
3094                bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3095
3096        /* fill in ARGV array */
3097        setvar_i(intvar[ARGC], argc);
3098        setari_u(intvar[ARGV], 0, "awk");
3099        i = 0;
3100        while (*argv)
3101                setari_u(intvar[ARGV], ++i, *argv++);
3102
3103        evaluate(beginseq.first, &tv);
3104        if (!mainseq.first && !endseq.first)
3105                awk_exit(EXIT_SUCCESS);
3106
3107        /* input file could already be opened in BEGIN block */
3108        if (!iF)
3109                iF = next_input_file();
3110
3111        /* passing through input files */
3112        while (iF) {
3113                nextfile = FALSE;
3114                setvar_i(intvar[FNR], 0);
3115
3116                while ((i = awk_getline(iF, intvar[F0])) > 0) {
3117                        nextrec = FALSE;
3118                        incvar(intvar[NR]);
3119                        incvar(intvar[FNR]);
3120                        evaluate(mainseq.first, &tv);
3121
3122                        if (nextfile)
3123                                break;
3124                }
3125
3126                if (i < 0)
3127                        syntax_error(strerror(errno));
3128
3129                iF = next_input_file();
3130        }
3131
3132        awk_exit(EXIT_SUCCESS);
3133        /*return 0;*/
3134}
3135