busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
   8 */
   9
  10#include "libbb.h"
  11#include "xregex.h"
  12#include <math.h>
  13
  14/* This is a NOEXEC applet. Be very careful! */
  15
  16
  17#define MAXVARFMT       240
  18#define MINNVBLOCK      64
  19
  20/* variable flags */
  21#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  22#define VF_ARRAY        0x0002  /* 1 = it's an array */
  23
  24#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
  25#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
  26#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
  27#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
  28#define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
  29#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
  30#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
  31
  32/* these flags are static, don't change them when value is changed */
  33#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
  34
  35/* Variable */
  36typedef struct var_s {
  37        unsigned type;            /* flags */
  38        double number;
  39        char *string;
  40        union {
  41                int aidx;               /* func arg idx (for compilation stage) */
  42                struct xhash_s *array;  /* array ptr */
  43                struct var_s *parent;   /* for func args, ptr to actual parameter */
  44                char **walker;          /* list of array elements (for..in) */
  45        } x;
  46} var;
  47
  48/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
  49typedef struct chain_s {
  50        struct node_s *first;
  51        struct node_s *last;
  52        const char *programname;
  53} chain;
  54
  55/* Function */
  56typedef struct func_s {
  57        unsigned nargs;
  58        struct chain_s body;
  59} func;
  60
  61/* I/O stream */
  62typedef struct rstream_s {
  63        FILE *F;
  64        char *buffer;
  65        int adv;
  66        int size;
  67        int pos;
  68        smallint is_pipe;
  69} rstream;
  70
  71typedef struct hash_item_s {
  72        union {
  73                struct var_s v;         /* variable/array hash */
  74                struct rstream_s rs;    /* redirect streams hash */
  75                struct func_s f;        /* functions hash */
  76        } data;
  77        struct hash_item_s *next;       /* next in chain */
  78        char name[1];                   /* really it's longer */
  79} hash_item;
  80
  81typedef struct xhash_s {
  82        unsigned nel;           /* num of elements */
  83        unsigned csize;         /* current hash size */
  84        unsigned nprime;        /* next hash size in PRIMES[] */
  85        unsigned glen;          /* summary length of item names */
  86        struct hash_item_s **items;
  87} xhash;
  88
  89/* Tree node */
  90typedef struct node_s {
  91        uint32_t info;
  92        unsigned lineno;
  93        union {
  94                struct node_s *n;
  95                var *v;
  96                int i;
  97                char *s;
  98                regex_t *re;
  99        } l;
 100        union {
 101                struct node_s *n;
 102                regex_t *ire;
 103                func *f;
 104                int argno;
 105        } r;
 106        union {
 107                struct node_s *n;
 108        } a;
 109} node;
 110
 111/* Block of temporary variables */
 112typedef struct nvblock_s {
 113        int size;
 114        var *pos;
 115        struct nvblock_s *prev;
 116        struct nvblock_s *next;
 117        var nv[0];
 118} nvblock;
 119
 120typedef struct tsplitter_s {
 121        node n;
 122        regex_t re[2];
 123} tsplitter;
 124
 125/* simple token classes */
 126/* Order and hex values are very important!!!  See next_token() */
 127#define TC_SEQSTART      1                              /* ( */
 128#define TC_SEQTERM      (1 << 1)                /* ) */
 129#define TC_REGEXP       (1 << 2)                /* /.../ */
 130#define TC_OUTRDR       (1 << 3)                /* | > >> */
 131#define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
 132#define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
 133#define TC_BINOPX       (1 << 6)                /* two-opnd operator */
 134#define TC_IN           (1 << 7)
 135#define TC_COMMA        (1 << 8)
 136#define TC_PIPE         (1 << 9)                /* input redirection pipe */
 137#define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
 138#define TC_ARRTERM      (1 << 11)               /* ] */
 139#define TC_GRPSTART     (1 << 12)               /* { */
 140#define TC_GRPTERM      (1 << 13)               /* } */
 141#define TC_SEMICOL      (1 << 14)
 142#define TC_NEWLINE      (1 << 15)
 143#define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
 144#define TC_WHILE        (1 << 17)
 145#define TC_ELSE         (1 << 18)
 146#define TC_BUILTIN      (1 << 19)
 147#define TC_GETLINE      (1 << 20)
 148#define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
 149#define TC_BEGIN        (1 << 22)
 150#define TC_END          (1 << 23)
 151#define TC_EOF          (1 << 24)
 152#define TC_VARIABLE     (1 << 25)
 153#define TC_ARRAY        (1 << 26)
 154#define TC_FUNCTION     (1 << 27)
 155#define TC_STRING       (1 << 28)
 156#define TC_NUMBER       (1 << 29)
 157
 158#define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
 159
 160/* combined token classes */
 161#define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 162#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 163#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 164                   | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
 165
 166#define TC_STATEMNT (TC_STATX | TC_WHILE)
 167#define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
 168
 169/* word tokens, cannot mean something else if not expected */
 170#define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
 171                   | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
 172
 173/* discard newlines after these */
 174#define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
 175                   | TC_BINOP | TC_OPTERM)
 176
 177/* what can expression begin with */
 178#define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
 179/* what can group begin with */
 180#define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
 181
 182/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 183/* operator is inserted between them */
 184#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
 185                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
 186#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 187
 188#define OF_RES1    0x010000
 189#define OF_RES2    0x020000
 190#define OF_STR1    0x040000
 191#define OF_STR2    0x080000
 192#define OF_NUM1    0x100000
 193#define OF_CHECKED 0x200000
 194
 195/* combined operator flags */
 196#define xx      0
 197#define xV      OF_RES2
 198#define xS      (OF_RES2 | OF_STR2)
 199#define Vx      OF_RES1
 200#define VV      (OF_RES1 | OF_RES2)
 201#define Nx      (OF_RES1 | OF_NUM1)
 202#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 203#define Sx      (OF_RES1 | OF_STR1)
 204#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 205#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 206
 207#define OPCLSMASK 0xFF00
 208#define OPNMASK   0x007F
 209
 210/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
 212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
 213 */
 214#define P(x)      (x << 24)
 215#define PRIMASK   0x7F000000
 216#define PRIMASK2  0x7E000000
 217
 218/* Operation classes */
 219
 220#define SHIFT_TIL_THIS  0x0600
 221#define RECUR_FROM_THIS 0x1000
 222
 223enum {
 224        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 225        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 226
 227        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 228        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 229        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 230
 231        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 232        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 233        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 234        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 235        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 236        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 237        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 238        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 239        OC_DONE = 0x2800,
 240
 241        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 242        ST_WHILE = 0x3300
 243};
 244
 245/* simple builtins */
 246enum {
 247        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 248        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 249};
 250
 251/* builtins */
 252enum {
 253        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
 254        B_ge,   B_gs,   B_su,
 255        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 256};
 257
 258/* tokens and their corresponding info values */
 259
 260#define NTC     "\377"  /* switch to next token class (tc<<1) */
 261#define NTCC    '\377'
 262
 263#define OC_B    OC_BUILTIN
 264
 265static const char tokenlist[] ALIGN1 =
 266        "\1("       NTC
 267        "\1)"       NTC
 268        "\1/"       NTC                                 /* REGEXP */
 269        "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
 270        "\2++"      "\2--"      NTC                     /* UOPPOST */
 271        "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
 272        "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
 273        "\2*="      "\2/="      "\2%="      "\2^="
 274        "\1+"       "\1-"       "\3**="     "\2**"
 275        "\1/"       "\1%"       "\1^"       "\1*"
 276        "\2!="      "\2>="      "\2<="      "\1>"
 277        "\1<"       "\2!~"      "\1~"       "\2&&"
 278        "\2||"      "\1?"       "\1:"       NTC
 279        "\2in"      NTC
 280        "\1,"       NTC
 281        "\1|"       NTC
 282        "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
 283        "\1]"       NTC
 284        "\1{"       NTC
 285        "\1}"       NTC
 286        "\1;"       NTC
 287        "\1\n"      NTC
 288        "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
 289        "\10continue"           "\6delete"  "\5print"
 290        "\6printf"  "\4next"    "\10nextfile"
 291        "\6return"  "\4exit"    NTC
 292        "\5while"   NTC
 293        "\4else"    NTC
 294
 295        "\3and"     "\5compl"   "\6lshift"  "\2or"
 296        "\6rshift"  "\3xor"
 297        "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
 298        "\3cos"     "\3exp"     "\3int"     "\3log"
 299        "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
 300        "\6gensub"  "\4gsub"    "\5index"   "\6length"
 301        "\5match"   "\5split"   "\7sprintf" "\3sub"
 302        "\6substr"  "\7systime" "\10strftime"
 303        "\7tolower" "\7toupper" NTC
 304        "\7getline" NTC
 305        "\4func"    "\10function"   NTC
 306        "\5BEGIN"   NTC
 307        "\3END"     "\0"
 308        ;
 309
 310static const uint32_t tokeninfo[] = {
 311        0,
 312        0,
 313        OC_REGEXP,
 314        xS|'a',     xS|'w',     xS|'|',
 315        OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
 316        OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
 317            OC_FIELD|xV|P(5),
 318        OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
 319            OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
 320        OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
 321            OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
 322        OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
 323            OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
 324        OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
 325            OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
 326        OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
 327            OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
 328        OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
 329            OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
 330        OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
 331            OC_COLON|xx|P(67)|':',
 332        OC_IN|SV|P(49),
 333        OC_COMMA|SS|P(80),
 334        OC_PGETLINE|SV|P(37),
 335        OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
 336            OC_UNARY|xV|P(19)|'!',
 337        0,
 338        0,
 339        0,
 340        0,
 341        0,
 342        ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
 343        OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
 344        OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
 345        OC_RETURN|Vx,   OC_EXIT|Nx,
 346        ST_WHILE,
 347        0,
 348
 349        OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
 350        OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
 351        OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
 352        OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
 353        OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
 354        OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
 355        OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
 356        OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
 357        OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
 358        OC_GETLINE|SV|P(0),
 359        0,      0,
 360        0,
 361        0
 362};
 363
 364/* internal variable names and their initial values       */
 365/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 366enum {
 367        CONVFMT,    OFMT,       FS,         OFS,
 368        ORS,        RS,         RT,         FILENAME,
 369        SUBSEP,     ARGIND,     ARGC,       ARGV,
 370        ERRNO,      FNR,
 371        NR,         NF,         IGNORECASE,
 372        ENVIRON,    F0,         NUM_INTERNAL_VARS
 373};
 374
 375static const char vNames[] ALIGN1 =
 376        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 377        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 378        "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
 379        "ERRNO\0"   "FNR\0"
 380        "NR\0"      "NF\0*"     "IGNORECASE\0*"
 381        "ENVIRON\0" "$\0*"      "\0";
 382
 383static const char vValues[] ALIGN1 =
 384        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 385        "\n\0"      "\n\0"      "\0"        "\0"
 386        "\034\0"
 387        "\377";
 388
 389/* hash size may grow to these values */
 390#define FIRST_PRIME 61
 391static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 392
 393
 394/* Globals. Split in two parts so that first one is addressed
 395 * with (mostly short) negative offsets */
 396struct globals {
 397        chain beginseq, mainseq, endseq;
 398        chain *seq;
 399        node *break_ptr, *continue_ptr;
 400        rstream *iF;
 401        xhash *vhash, *ahash, *fdhash, *fnhash;
 402        const char *g_progname;
 403        int g_lineno;
 404        int nfields;
 405        int maxfields; /* used in fsrealloc() only */
 406        var *Fields;
 407        nvblock *g_cb;
 408        char *g_pos;
 409        char *g_buf;
 410        smallint icase;
 411        smallint exiting;
 412        smallint nextrec;
 413        smallint nextfile;
 414        smallint is_f0_split;
 415};
 416struct globals2 {
 417        uint32_t t_info; /* often used */
 418        uint32_t t_tclass;
 419        char *t_string;
 420        int t_lineno;
 421        int t_rollback;
 422
 423        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 424
 425        /* former statics from various functions */
 426        char *split_f0__fstrings;
 427
 428        uint32_t next_token__save_tclass;
 429        uint32_t next_token__save_info;
 430        uint32_t next_token__ltclass;
 431        smallint next_token__concat_inserted;
 432
 433        smallint next_input_file__files_happen;
 434        rstream next_input_file__rsm;
 435
 436        var *evaluate__fnargs;
 437        unsigned evaluate__seed;
 438        regex_t evaluate__sreg;
 439
 440        var ptest__v;
 441
 442        tsplitter exec_builtin__tspl;
 443
 444        /* biggest and least used members go last */
 445        double t_double;
 446        tsplitter fsplitter, rsplitter;
 447};
 448#define G1 (ptr_to_globals[-1])
 449#define G (*(struct globals2 *)ptr_to_globals)
 450/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 451/* char G1size[sizeof(G1)]; - 0x6c */
 452/* char Gsize[sizeof(G)]; - 0x1cc */
 453/* Trying to keep most of members accessible with short offsets: */
 454/* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
 455#define beginseq     (G1.beginseq    )
 456#define mainseq      (G1.mainseq     )
 457#define endseq       (G1.endseq      )
 458#define seq          (G1.seq         )
 459#define break_ptr    (G1.break_ptr   )
 460#define continue_ptr (G1.continue_ptr)
 461#define iF           (G1.iF          )
 462#define vhash        (G1.vhash       )
 463#define ahash        (G1.ahash       )
 464#define fdhash       (G1.fdhash      )
 465#define fnhash       (G1.fnhash      )
 466#define g_progname   (G1.g_progname  )
 467#define g_lineno     (G1.g_lineno    )
 468#define nfields      (G1.nfields     )
 469#define maxfields    (G1.maxfields   )
 470#define Fields       (G1.Fields      )
 471#define g_cb         (G1.g_cb        )
 472#define g_pos        (G1.g_pos       )
 473#define g_buf        (G1.g_buf       )
 474#define icase        (G1.icase       )
 475#define exiting      (G1.exiting     )
 476#define nextrec      (G1.nextrec     )
 477#define nextfile     (G1.nextfile    )
 478#define is_f0_split  (G1.is_f0_split )
 479#define t_info       (G.t_info      )
 480#define t_tclass     (G.t_tclass    )
 481#define t_string     (G.t_string    )
 482#define t_double     (G.t_double    )
 483#define t_lineno     (G.t_lineno    )
 484#define t_rollback   (G.t_rollback  )
 485#define intvar       (G.intvar      )
 486#define fsplitter    (G.fsplitter   )
 487#define rsplitter    (G.rsplitter   )
 488#define INIT_G() do { \
 489        SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
 490        G.next_token__ltclass = TC_OPTERM; \
 491        G.evaluate__seed = 1; \
 492} while (0)
 493
 494
 495/* function prototypes */
 496static void handle_special(var *);
 497static node *parse_expr(uint32_t);
 498static void chain_group(void);
 499static var *evaluate(node *, var *);
 500static rstream *next_input_file(void);
 501static int fmt_num(char *, int, const char *, double, int);
 502static int awk_exit(int) NORETURN;
 503
 504/* ---- error handling ---- */
 505
 506static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
 507static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 508static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 509static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 510static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 511static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
 512static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 513static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 514static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 515#if !ENABLE_FEATURE_AWK_LIBM
 516static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 517#endif
 518
 519static void zero_out_var(var * vp)
 520{
 521        memset(vp, 0, sizeof(*vp));
 522}
 523
 524static void syntax_error(const char *const message) NORETURN;
 525static void syntax_error(const char *const message)
 526{
 527        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 528}
 529
 530/* ---- hash stuff ---- */
 531
 532static unsigned hashidx(const char *name)
 533{
 534        unsigned idx = 0;
 535
 536        while (*name) idx = *name++ + (idx << 6) - idx;
 537        return idx;
 538}
 539
 540/* create new hash */
 541static xhash *hash_init(void)
 542{
 543        xhash *newhash;
 544
 545        newhash = xzalloc(sizeof(xhash));
 546        newhash->csize = FIRST_PRIME;
 547        newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
 548
 549        return newhash;
 550}
 551
 552/* find item in hash, return ptr to data, NULL if not found */
 553static void *hash_search(xhash *hash, const char *name)
 554{
 555        hash_item *hi;
 556
 557        hi = hash->items [ hashidx(name) % hash->csize ];
 558        while (hi) {
 559                if (strcmp(hi->name, name) == 0)
 560                        return &(hi->data);
 561                hi = hi->next;
 562        }
 563        return NULL;
 564}
 565
 566/* grow hash if it becomes too big */
 567static void hash_rebuild(xhash *hash)
 568{
 569        unsigned newsize, i, idx;
 570        hash_item **newitems, *hi, *thi;
 571
 572        if (hash->nprime == ARRAY_SIZE(PRIMES))
 573                return;
 574
 575        newsize = PRIMES[hash->nprime++];
 576        newitems = xzalloc(newsize * sizeof(hash_item *));
 577
 578        for (i = 0; i < hash->csize; i++) {
 579                hi = hash->items[i];
 580                while (hi) {
 581                        thi = hi;
 582                        hi = thi->next;
 583                        idx = hashidx(thi->name) % newsize;
 584                        thi->next = newitems[idx];
 585                        newitems[idx] = thi;
 586                }
 587        }
 588
 589        free(hash->items);
 590        hash->csize = newsize;
 591        hash->items = newitems;
 592}
 593
 594/* find item in hash, add it if necessary. Return ptr to data */
 595static void *hash_find(xhash *hash, const char *name)
 596{
 597        hash_item *hi;
 598        unsigned idx;
 599        int l;
 600
 601        hi = hash_search(hash, name);
 602        if (!hi) {
 603                if (++hash->nel / hash->csize > 10)
 604                        hash_rebuild(hash);
 605
 606                l = strlen(name) + 1;
 607                hi = xzalloc(sizeof(hash_item) + l);
 608                memcpy(hi->name, name, l);
 609
 610                idx = hashidx(name) % hash->csize;
 611                hi->next = hash->items[idx];
 612                hash->items[idx] = hi;
 613                hash->glen += l;
 614        }
 615        return &(hi->data);
 616}
 617
 618#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 619#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 620#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 621#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 622
 623static void hash_remove(xhash *hash, const char *name)
 624{
 625        hash_item *hi, **phi;
 626
 627        phi = &(hash->items[hashidx(name) % hash->csize]);
 628        while (*phi) {
 629                hi = *phi;
 630                if (strcmp(hi->name, name) == 0) {
 631                        hash->glen -= (strlen(name) + 1);
 632                        hash->nel--;
 633                        *phi = hi->next;
 634                        free(hi);
 635                        break;
 636                }
 637                phi = &(hi->next);
 638        }
 639}
 640
 641/* ------ some useful functions ------ */
 642
 643static void skip_spaces(char **s)
 644{
 645        char *p = *s;
 646
 647        while (1) {
 648                if (*p == '\\' && p[1] == '\n') {
 649                        p++;
 650                        t_lineno++;
 651                } else if (*p != ' ' && *p != '\t') {
 652                        break;
 653                }
 654                p++;
 655        }
 656        *s = p;
 657}
 658
 659static char *nextword(char **s)
 660{
 661        char *p = *s;
 662
 663        while (*(*s)++) /* */;
 664
 665        return p;
 666}
 667
 668static char nextchar(char **s)
 669{
 670        char c, *pps;
 671
 672        c = *((*s)++);
 673        pps = *s;
 674        if (c == '\\') c = bb_process_escape_sequence((const char**)s);
 675        if (c == '\\' && *s == pps) c = *((*s)++);
 676        return c;
 677}
 678
 679static ALWAYS_INLINE int isalnum_(int c)
 680{
 681        return (isalnum(c) || c == '_');
 682}
 683
 684static double my_strtod(char **pp)
 685{
 686#if ENABLE_DESKTOP
 687        if ((*pp)[0] == '0'
 688         && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
 689        ) {
 690                return strtoull(*pp, pp, 0);
 691        }
 692#endif
 693        return strtod(*pp, pp);
 694}
 695
 696/* -------- working with variables (set/get/copy/etc) -------- */
 697
 698static xhash *iamarray(var *v)
 699{
 700        var *a = v;
 701
 702        while (a->type & VF_CHILD)
 703                a = a->x.parent;
 704
 705        if (!(a->type & VF_ARRAY)) {
 706                a->type |= VF_ARRAY;
 707                a->x.array = hash_init();
 708        }
 709        return a->x.array;
 710}
 711
 712static void clear_array(xhash *array)
 713{
 714        unsigned i;
 715        hash_item *hi, *thi;
 716
 717        for (i = 0; i < array->csize; i++) {
 718                hi = array->items[i];
 719                while (hi) {
 720                        thi = hi;
 721                        hi = hi->next;
 722                        free(thi->data.v.string);
 723                        free(thi);
 724                }
 725                array->items[i] = NULL;
 726        }
 727        array->glen = array->nel = 0;
 728}
 729
 730/* clear a variable */
 731static var *clrvar(var *v)
 732{
 733        if (!(v->type & VF_FSTR))
 734                free(v->string);
 735
 736        v->type &= VF_DONTTOUCH;
 737        v->type |= VF_DIRTY;
 738        v->string = NULL;
 739        return v;
 740}
 741
 742/* assign string value to variable */
 743static var *setvar_p(var *v, char *value)
 744{
 745        clrvar(v);
 746        v->string = value;
 747        handle_special(v);
 748        return v;
 749}
 750
 751/* same as setvar_p but make a copy of string */
 752static var *setvar_s(var *v, const char *value)
 753{
 754        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 755}
 756
 757/* same as setvar_s but set USER flag */
 758static var *setvar_u(var *v, const char *value)
 759{
 760        setvar_s(v, value);
 761        v->type |= VF_USER;
 762        return v;
 763}
 764
 765/* set array element to user string */
 766static void setari_u(var *a, int idx, const char *s)
 767{
 768        char sidx[sizeof(int)*3 + 1];
 769        var *v;
 770
 771        sprintf(sidx, "%d", idx);
 772        v = findvar(iamarray(a), sidx);
 773        setvar_u(v, s);
 774}
 775
 776/* assign numeric value to variable */
 777static var *setvar_i(var *v, double value)
 778{
 779        clrvar(v);
 780        v->type |= VF_NUMBER;
 781        v->number = value;
 782        handle_special(v);
 783        return v;
 784}
 785
 786static const char *getvar_s(var *v)
 787{
 788        /* if v is numeric and has no cached string, convert it to string */
 789        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
 790                fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
 791                v->string = xstrdup(g_buf);
 792                v->type |= VF_CACHED;
 793        }
 794        return (v->string == NULL) ? "" : v->string;
 795}
 796
 797static double getvar_i(var *v)
 798{
 799        char *s;
 800
 801        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
 802                v->number = 0;
 803                s = v->string;
 804                if (s && *s) {
 805                        v->number = my_strtod(&s);
 806                        if (v->type & VF_USER) {
 807                                skip_spaces(&s);
 808                                if (*s != '\0')
 809                                        v->type &= ~VF_USER;
 810                        }
 811                } else {
 812                        v->type &= ~VF_USER;
 813                }
 814                v->type |= VF_CACHED;
 815        }
 816        return v->number;
 817}
 818
 819/* Used for operands of bitwise ops */
 820static unsigned long getvar_i_int(var *v)
 821{
 822        double d = getvar_i(v);
 823
 824        /* Casting doubles to longs is undefined for values outside
 825         * of target type range. Try to widen it as much as possible */
 826        if (d >= 0)
 827                return (unsigned long)d;
 828        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
 829        return - (long) (unsigned long) (-d);
 830}
 831
 832static var *copyvar(var *dest, const var *src)
 833{
 834        if (dest != src) {
 835                clrvar(dest);
 836                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
 837                dest->number = src->number;
 838                if (src->string)
 839                        dest->string = xstrdup(src->string);
 840        }
 841        handle_special(dest);
 842        return dest;
 843}
 844
 845static var *incvar(var *v)
 846{
 847        return setvar_i(v, getvar_i(v) + 1.);
 848}
 849
 850/* return true if v is number or numeric string */
 851static int is_numeric(var *v)
 852{
 853        getvar_i(v);
 854        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
 855}
 856
 857/* return 1 when value of v corresponds to true, 0 otherwise */
 858static int istrue(var *v)
 859{
 860        if (is_numeric(v))
 861                return (v->number == 0) ? 0 : 1;
 862        return (v->string && *(v->string)) ? 1 : 0;
 863}
 864
 865/* temporary variables allocator. Last allocated should be first freed */
 866static var *nvalloc(int n)
 867{
 868        nvblock *pb = NULL;
 869        var *v, *r;
 870        int size;
 871
 872        while (g_cb) {
 873                pb = g_cb;
 874                if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
 875                g_cb = g_cb->next;
 876        }
 877
 878        if (!g_cb) {
 879                size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
 880                g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
 881                g_cb->size = size;
 882                g_cb->pos = g_cb->nv;
 883                g_cb->prev = pb;
 884                /*g_cb->next = NULL; - xzalloc did it */
 885                if (pb) pb->next = g_cb;
 886        }
 887
 888        v = r = g_cb->pos;
 889        g_cb->pos += n;
 890
 891        while (v < g_cb->pos) {
 892                v->type = 0;
 893                v->string = NULL;
 894                v++;
 895        }
 896
 897        return r;
 898}
 899
 900static void nvfree(var *v)
 901{
 902        var *p;
 903
 904        if (v < g_cb->nv || v >= g_cb->pos)
 905                syntax_error(EMSG_INTERNAL_ERROR);
 906
 907        for (p = v; p < g_cb->pos; p++) {
 908                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
 909                        clear_array(iamarray(p));
 910                        free(p->x.array->items);
 911                        free(p->x.array);
 912                }
 913                if (p->type & VF_WALK)
 914                        free(p->x.walker);
 915
 916                clrvar(p);
 917        }
 918
 919        g_cb->pos = v;
 920        while (g_cb->prev && g_cb->pos == g_cb->nv) {
 921                g_cb = g_cb->prev;
 922        }
 923}
 924
 925/* ------- awk program text parsing ------- */
 926
 927/* Parse next token pointed by global pos, place results into global ttt.
 928 * If token isn't expected, give away. Return token class
 929 */
 930static uint32_t next_token(uint32_t expected)
 931{
 932#define concat_inserted (G.next_token__concat_inserted)
 933#define save_tclass     (G.next_token__save_tclass)
 934#define save_info       (G.next_token__save_info)
 935/* Initialized to TC_OPTERM: */
 936#define ltclass         (G.next_token__ltclass)
 937
 938        char *p, *pp, *s;
 939        const char *tl;
 940        uint32_t tc;
 941        const uint32_t *ti;
 942        int l;
 943
 944        if (t_rollback) {
 945                t_rollback = FALSE;
 946
 947        } else if (concat_inserted) {
 948                concat_inserted = FALSE;
 949                t_tclass = save_tclass;
 950                t_info = save_info;
 951
 952        } else {
 953                p = g_pos;
 954 readnext:
 955                skip_spaces(&p);
 956                g_lineno = t_lineno;
 957                if (*p == '#')
 958                        while (*p != '\n' && *p != '\0')
 959                                p++;
 960
 961                if (*p == '\n')
 962                        t_lineno++;
 963
 964                if (*p == '\0') {
 965                        tc = TC_EOF;
 966
 967                } else if (*p == '\"') {
 968                        /* it's a string */
 969                        t_string = s = ++p;
 970                        while (*p != '\"') {
 971                                if (*p == '\0' || *p == '\n')
 972                                        syntax_error(EMSG_UNEXP_EOS);
 973                                *(s++) = nextchar(&p);
 974                        }
 975                        p++;
 976                        *s = '\0';
 977                        tc = TC_STRING;
 978
 979                } else if ((expected & TC_REGEXP) && *p == '/') {
 980                        /* it's regexp */
 981                        t_string = s = ++p;
 982                        while (*p != '/') {
 983                                if (*p == '\0' || *p == '\n')
 984                                        syntax_error(EMSG_UNEXP_EOS);
 985                                *s = *p++;
 986                                if (*s++ == '\\') {
 987                                        pp = p;
 988                                        *(s-1) = bb_process_escape_sequence((const char **)&p);
 989                                        if (*pp == '\\')
 990                                                *s++ = '\\';
 991                                        if (p == pp)
 992                                                *s++ = *p++;
 993                                }
 994                        }
 995                        p++;
 996                        *s = '\0';
 997                        tc = TC_REGEXP;
 998
 999                } else if (*p == '.' || isdigit(*p)) {
1000                        /* it's a number */
1001                        t_double = my_strtod(&p);
1002                        if (*p == '.')
1003                                syntax_error(EMSG_UNEXP_TOKEN);
1004                        tc = TC_NUMBER;
1005
1006                } else {
1007                        /* search for something known */
1008                        tl = tokenlist;
1009                        tc = 0x00000001;
1010                        ti = tokeninfo;
1011                        while (*tl) {
1012                                l = *(tl++);
1013                                if (l == NTCC) {
1014                                        tc <<= 1;
1015                                        continue;
1016                                }
1017                                /* if token class is expected, token
1018                                 * matches and it's not a longer word,
1019                                 * then this is what we are looking for
1020                                 */
1021                                if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022                                 && *tl == *p && strncmp(p, tl, l) == 0
1023                                 && !((tc & TC_WORD) && isalnum_(p[l]))
1024                                ) {
1025                                        t_info = *ti;
1026                                        p += l;
1027                                        break;
1028                                }
1029                                ti++;
1030                                tl += l;
1031                        }
1032
1033                        if (!*tl) {
1034                                /* it's a name (var/array/function),
1035                                 * otherwise it's something wrong
1036                                 */
1037                                if (!isalnum_(*p))
1038                                        syntax_error(EMSG_UNEXP_TOKEN);
1039
1040                                t_string = --p;
1041                                while (isalnum_(*(++p))) {
1042                                        *(p-1) = *p;
1043                                }
1044                                *(p-1) = '\0';
1045                                tc = TC_VARIABLE;
1046                                /* also consume whitespace between functionname and bracket */
1047                                if (!(expected & TC_VARIABLE))
1048                                        skip_spaces(&p);
1049                                if (*p == '(') {
1050                                        tc = TC_FUNCTION;
1051                                } else {
1052                                        if (*p == '[') {
1053                                                p++;
1054                                                tc = TC_ARRAY;
1055                                        }
1056                                }
1057                        }
1058                }
1059                g_pos = p;
1060
1061                /* skipping newlines in some cases */
1062                if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063                        goto readnext;
1064
1065                /* insert concatenation operator when needed */
1066                if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067                        concat_inserted = TRUE;
1068                        save_tclass = tc;
1069                        save_info = t_info;
1070                        tc = TC_BINOP;
1071                        t_info = OC_CONCAT | SS | P(35);
1072                }
1073
1074                t_tclass = tc;
1075        }
1076        ltclass = t_tclass;
1077
1078        /* Are we ready for this? */
1079        if (!(ltclass & expected))
1080                syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1082
1083        return ltclass;
1084#undef concat_inserted
1085#undef save_tclass
1086#undef save_info
1087#undef ltclass
1088}
1089
1090static void rollback_token(void)
1091{
1092        t_rollback = TRUE;
1093}
1094
1095static node *new_node(uint32_t info)
1096{
1097        node *n;
1098
1099        n = xzalloc(sizeof(node));
1100        n->info = info;
1101        n->lineno = g_lineno;
1102        return n;
1103}
1104
1105static node *mk_re_node(const char *s, node *n, regex_t *re)
1106{
1107        n->info = OC_REGEXP;
1108        n->l.re = re;
1109        n->r.ire = re + 1;
1110        xregcomp(re, s, REG_EXTENDED);
1111        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1112
1113        return n;
1114}
1115
1116static node *condition(void)
1117{
1118        next_token(TC_SEQSTART);
1119        return parse_expr(TC_SEQTERM);
1120}
1121
1122/* parse expression terminated by given argument, return ptr
1123 * to built subtree. Terminator is eaten by parse_expr */
1124static node *parse_expr(uint32_t iexp)
1125{
1126        node sn;
1127        node *cn = &sn;
1128        node *vn, *glptr;
1129        uint32_t tc, xtc;
1130        var *v;
1131
1132        sn.info = PRIMASK;
1133        sn.r.n = glptr = NULL;
1134        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1135
1136        while (!((tc = next_token(xtc)) & iexp)) {
1137                if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138                        /* input redirection (<) attached to glptr node */
1139                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1140                        cn->a.n = glptr;
1141                        xtc = TC_OPERAND | TC_UOPPRE;
1142                        glptr = NULL;
1143
1144                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145                        /* for binary and postfix-unary operators, jump back over
1146                         * previous operators with higher priority */
1147                        vn = cn;
1148                        while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149                         || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1150                                vn = vn->a.n;
1151                        if ((t_info & OPCLSMASK) == OC_TERNARY)
1152                                t_info += P(6);
1153                        cn = vn->a.n->r.n = new_node(t_info);
1154                        cn->a.n = vn->a.n;
1155                        if (tc & TC_BINOP) {
1156                                cn->l.n = vn;
1157                                xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158                                if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1159                                        /* it's a pipe */
1160                                        next_token(TC_GETLINE);
1161                                        /* give maximum priority to this pipe */
1162                                        cn->info &= ~PRIMASK;
1163                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1164                                }
1165                        } else {
1166                                cn->r.n = vn;
1167                                xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168                        }
1169                        vn->a.n = cn;
1170
1171                } else {
1172                        /* for operands and prefix-unary operators, attach them
1173                         * to last node */
1174                        vn = cn;
1175                        cn = vn->r.n = new_node(t_info);
1176                        cn->a.n = vn;
1177                        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178                        if (tc & (TC_OPERAND | TC_REGEXP)) {
1179                                xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180                                /* one should be very careful with switch on tclass -
1181                                 * only simple tclasses should be used! */
1182                                switch (tc) {
1183                                case TC_VARIABLE:
1184                                case TC_ARRAY:
1185                                        cn->info = OC_VAR;
1186                                        v = hash_search(ahash, t_string);
1187                                        if (v != NULL) {
1188                                                cn->info = OC_FNARG;
1189                                                cn->l.i = v->x.aidx;
1190                                        } else {
1191                                                cn->l.v = newvar(t_string);
1192                                        }
1193                                        if (tc & TC_ARRAY) {
1194                                                cn->info |= xS;
1195                                                cn->r.n = parse_expr(TC_ARRTERM);
1196                                        }
1197                                        break;
1198
1199                                case TC_NUMBER:
1200                                case TC_STRING:
1201                                        cn->info = OC_VAR;
1202                                        v = cn->l.v = xzalloc(sizeof(var));
1203                                        if (tc & TC_NUMBER)
1204                                                setvar_i(v, t_double);
1205                                        else
1206                                                setvar_s(v, t_string);
1207                                        break;
1208
1209                                case TC_REGEXP:
1210                                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1211                                        break;
1212
1213                                case TC_FUNCTION:
1214                                        cn->info = OC_FUNC;
1215                                        cn->r.f = newfunc(t_string);
1216                                        cn->l.n = condition();
1217                                        break;
1218
1219                                case TC_SEQSTART:
1220                                        cn = vn->r.n = parse_expr(TC_SEQTERM);
1221                                        cn->a.n = vn;
1222                                        break;
1223
1224                                case TC_GETLINE:
1225                                        glptr = cn;
1226                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227                                        break;
1228
1229                                case TC_BUILTIN:
1230                                        cn->l.n = condition();
1231                                        break;
1232                                }
1233                        }
1234                }
1235        }
1236        return sn.r.n;
1237}
1238
1239/* add node to chain. Return ptr to alloc'd node */
1240static node *chain_node(uint32_t info)
1241{
1242        node *n;
1243
1244        if (!seq->first)
1245                seq->first = seq->last = new_node(0);
1246
1247        if (seq->programname != g_progname) {
1248                seq->programname = g_progname;
1249                n = chain_node(OC_NEWSOURCE);
1250                n->l.s = xstrdup(g_progname);
1251        }
1252
1253        n = seq->last;
1254        n->info = info;
1255        seq->last = n->a.n = new_node(OC_DONE);
1256
1257        return n;
1258}
1259
1260static void chain_expr(uint32_t info)
1261{
1262        node *n;
1263
1264        n = chain_node(info);
1265        n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266        if (t_tclass & TC_GRPTERM)
1267                rollback_token();
1268}
1269
1270static node *chain_loop(node *nn)
1271{
1272        node *n, *n2, *save_brk, *save_cont;
1273
1274        save_brk = break_ptr;
1275        save_cont = continue_ptr;
1276
1277        n = chain_node(OC_BR | Vx);
1278        continue_ptr = new_node(OC_EXEC);
1279        break_ptr = new_node(OC_EXEC);
1280        chain_group();
1281        n2 = chain_node(OC_EXEC | Vx);
1282        n2->l.n = nn;
1283        n2->a.n = n;
1284        continue_ptr->a.n = n2;
1285        break_ptr->a.n = n->r.n = seq->last;
1286
1287        continue_ptr = save_cont;
1288        break_ptr = save_brk;
1289
1290        return n;
1291}
1292
1293/* parse group and attach it to chain */
1294static void chain_group(void)
1295{
1296        uint32_t c;
1297        node *n, *n2, *n3;
1298
1299        do {
1300                c = next_token(TC_GRPSEQ);
1301        } while (c & TC_NEWLINE);
1302
1303        if (c & TC_GRPSTART) {
1304                while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305                        if (t_tclass & TC_NEWLINE) continue;
1306                        rollback_token();
1307                        chain_group();
1308                }
1309        } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1310                rollback_token();
1311                chain_expr(OC_EXEC | Vx);
1312        } else {                                                /* TC_STATEMNT */
1313                switch (t_info & OPCLSMASK) {
1314                case ST_IF:
1315                        n = chain_node(OC_BR | Vx);
1316                        n->l.n = condition();
1317                        chain_group();
1318                        n2 = chain_node(OC_EXEC);
1319                        n->r.n = seq->last;
1320                        if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1321                                chain_group();
1322                                n2->a.n = seq->last;
1323                        } else {
1324                                rollback_token();
1325                        }
1326                        break;
1327
1328                case ST_WHILE:
1329                        n2 = condition();
1330                        n = chain_loop(NULL);
1331                        n->l.n = n2;
1332                        break;
1333
1334                case ST_DO:
1335                        n2 = chain_node(OC_EXEC);
1336                        n = chain_loop(NULL);
1337                        n2->a.n = n->a.n;
1338                        next_token(TC_WHILE);
1339                        n->l.n = condition();
1340                        break;
1341
1342                case ST_FOR:
1343                        next_token(TC_SEQSTART);
1344                        n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345                        if (t_tclass & TC_SEQTERM) {    /* for-in */
1346                                if ((n2->info & OPCLSMASK) != OC_IN)
1347                                        syntax_error(EMSG_UNEXP_TOKEN);
1348                                n = chain_node(OC_WALKINIT | VV);
1349                                n->l.n = n2->l.n;
1350                                n->r.n = n2->r.n;
1351                                n = chain_loop(NULL);
1352                                n->info = OC_WALKNEXT | Vx;
1353                                n->l.n = n2->l.n;
1354                        } else {                        /* for (;;) */
1355                                n = chain_node(OC_EXEC | Vx);
1356                                n->l.n = n2;
1357                                n2 = parse_expr(TC_SEMICOL);
1358                                n3 = parse_expr(TC_SEQTERM);
1359                                n = chain_loop(n3);
1360                                n->l.n = n2;
1361                                if (!n2)
1362                                        n->info = OC_EXEC;
1363                        }
1364                        break;
1365
1366                case OC_PRINT:
1367                case OC_PRINTF:
1368                        n = chain_node(t_info);
1369                        n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370                        if (t_tclass & TC_OUTRDR) {
1371                                n->info |= t_info;
1372                                n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1373                        }
1374                        if (t_tclass & TC_GRPTERM)
1375                                rollback_token();
1376                        break;
1377
1378                case OC_BREAK:
1379                        n = chain_node(OC_EXEC);
1380                        n->a.n = break_ptr;
1381                        break;
1382
1383                case OC_CONTINUE:
1384                        n = chain_node(OC_EXEC);
1385                        n->a.n = continue_ptr;
1386                        break;
1387
1388                /* delete, next, nextfile, return, exit */
1389                default:
1390                        chain_expr(t_info);
1391                }
1392        }
1393}
1394
1395static void parse_program(char *p)
1396{
1397        uint32_t tclass;
1398        node *cn;
1399        func *f;
1400        var *v;
1401
1402        g_pos = p;
1403        t_lineno = 1;
1404        while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405                        TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1406
1407                if (tclass & TC_OPTERM)
1408                        continue;
1409
1410                seq = &mainseq;
1411                if (tclass & TC_BEGIN) {
1412                        seq = &beginseq;
1413                        chain_group();
1414
1415                } else if (tclass & TC_END) {
1416                        seq = &endseq;
1417                        chain_group();
1418
1419                } else if (tclass & TC_FUNCDECL) {
1420                        next_token(TC_FUNCTION);
1421                        g_pos++;
1422                        f = newfunc(t_string);
1423                        f->body.first = NULL;
1424                        f->nargs = 0;
1425                        while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426                                v = findvar(ahash, t_string);
1427                                v->x.aidx = (f->nargs)++;
1428
1429                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1430                                        break;
1431                        }
1432                        seq = &(f->body);
1433                        chain_group();
1434                        clear_array(ahash);
1435
1436                } else if (tclass & TC_OPSEQ) {
1437                        rollback_token();
1438                        cn = chain_node(OC_TEST);
1439                        cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440                        if (t_tclass & TC_GRPSTART) {
1441                                rollback_token();
1442                                chain_group();
1443                        } else {
1444                                chain_node(OC_PRINT);
1445                        }
1446                        cn->r.n = mainseq.last;
1447
1448                } else /* if (tclass & TC_GRPSTART) */ {
1449                        rollback_token();
1450                        chain_group();
1451                }
1452        }
1453}
1454
1455
1456/* -------- program execution part -------- */
1457
1458static node *mk_splitter(const char *s, tsplitter *spl)
1459{
1460        regex_t *re, *ire;
1461        node *n;
1462
1463        re = &spl->re[0];
1464        ire = &spl->re[1];
1465        n = &spl->n;
1466        if ((n->info & OPCLSMASK) == OC_REGEXP) {
1467                regfree(re);
1468                regfree(ire); // TODO: nuke ire, use re+1?
1469        }
1470        if (strlen(s) > 1) {
1471                mk_re_node(s, n, re);
1472        } else {
1473                n->info = (uint32_t) *s;
1474        }
1475
1476        return n;
1477}
1478
1479/* use node as a regular expression. Supplied with node ptr and regex_t
1480 * storage space. Return ptr to regex (if result points to preg, it should
1481 * be later regfree'd manually
1482 */
1483static regex_t *as_regex(node *op, regex_t *preg)
1484{
1485        var *v;
1486        const char *s;
1487
1488        if ((op->info & OPCLSMASK) == OC_REGEXP) {
1489                return icase ? op->r.ire : op->l.re;
1490        }
1491        v = nvalloc(1);
1492        s = getvar_s(evaluate(op, v));
1493        xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1494        nvfree(v);
1495        return preg;
1496}
1497
1498/* gradually increasing buffer */
1499static void qrealloc(char **b, int n, int *size)
1500{
1501        if (!*b || n >= *size) {
1502                *size = n + (n>>1) + 80;
1503                *b = xrealloc(*b, *size);
1504        }
1505}
1506
1507/* resize field storage space */
1508static void fsrealloc(int size)
1509{
1510        int i;
1511
1512        if (size >= maxfields) {
1513                i = maxfields;
1514                maxfields = size + 16;
1515                Fields = xrealloc(Fields, maxfields * sizeof(var));
1516                for (; i < maxfields; i++) {
1517                        Fields[i].type = VF_SPECIAL;
1518                        Fields[i].string = NULL;
1519                }
1520        }
1521
1522        if (size < nfields) {
1523                for (i = size; i < nfields; i++) {
1524                        clrvar(Fields + i);
1525                }
1526        }
1527        nfields = size;
1528}
1529
1530static int awk_split(const char *s, node *spl, char **slist)
1531{
1532        int l, n = 0;
1533        char c[4];
1534        char *s1;
1535        regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1536
1537        /* in worst case, each char would be a separate field */
1538        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1539        strcpy(s1, s);
1540
1541        c[0] = c[1] = (char)spl->info;
1542        c[2] = c[3] = '\0';
1543        if (*getvar_s(intvar[RS]) == '\0')
1544                c[2] = '\n';
1545
1546        if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1547                if (!*s)
1548                        return n; /* "": zero fields */
1549                n++; /* at least one field will be there */
1550                do {
1551                        l = strcspn(s, c+2); /* len till next NUL or \n */
1552                        if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1553                         && pmatch[0].rm_so <= l
1554                        ) {
1555                                l = pmatch[0].rm_so;
1556                                if (pmatch[0].rm_eo == 0) {
1557                                        l++;
1558                                        pmatch[0].rm_eo++;
1559                                }
1560                                n++; /* we saw yet another delimiter */
1561                        } else {
1562                                pmatch[0].rm_eo = l;
1563                                if (s[l]) pmatch[0].rm_eo++;
1564                        }
1565                        memcpy(s1, s, l);
1566                        s1[l] = '\0';
1567                        nextword(&s1);
1568                        s += pmatch[0].rm_eo;
1569                } while (*s);
1570                return n;
1571        }
1572        if (c[0] == '\0') {  /* null split */
1573                while (*s) {
1574                        *s1++ = *s++;
1575                        *s1++ = '\0';
1576                        n++;
1577                }
1578                return n;
1579        }
1580        if (c[0] != ' ') {  /* single-character split */
1581                if (icase) {
1582                        c[0] = toupper(c[0]);
1583                        c[1] = tolower(c[1]);
1584                }
1585                if (*s1) n++;
1586                while ((s1 = strpbrk(s1, c))) {
1587                        *s1++ = '\0';
1588                        n++;
1589                }
1590                return n;
1591        }
1592        /* space split */
1593        while (*s) {
1594                s = skip_whitespace(s);
1595                if (!*s) break;
1596                n++;
1597                while (*s && !isspace(*s))
1598                        *s1++ = *s++;
1599                *s1++ = '\0';
1600        }
1601        return n;
1602}
1603
1604static void split_f0(void)
1605{
1606/* static char *fstrings; */
1607#define fstrings (G.split_f0__fstrings)
1608
1609        int i, n;
1610        char *s;
1611
1612        if (is_f0_split)
1613                return;
1614
1615        is_f0_split = TRUE;
1616        free(fstrings);
1617        fsrealloc(0);
1618        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1619        fsrealloc(n);
1620        s = fstrings;
1621        for (i = 0; i < n; i++) {
1622                Fields[i].string = nextword(&s);
1623                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1624        }
1625
1626        /* set NF manually to avoid side effects */
1627        clrvar(intvar[NF]);
1628        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1629        intvar[NF]->number = nfields;
1630#undef fstrings
1631}
1632
1633/* perform additional actions when some internal variables changed */
1634static void handle_special(var *v)
1635{
1636        int n;
1637        char *b;
1638        const char *sep, *s;
1639        int sl, l, len, i, bsize;
1640
1641        if (!(v->type & VF_SPECIAL))
1642                return;
1643
1644        if (v == intvar[NF]) {
1645                n = (int)getvar_i(v);
1646                fsrealloc(n);
1647
1648                /* recalculate $0 */
1649                sep = getvar_s(intvar[OFS]);
1650                sl = strlen(sep);
1651                b = NULL;
1652                len = 0;
1653                for (i = 0; i < n; i++) {
1654                        s = getvar_s(&Fields[i]);
1655                        l = strlen(s);
1656                        if (b) {
1657                                memcpy(b+len, sep, sl);
1658                                len += sl;
1659                        }
1660                        qrealloc(&b, len+l+sl, &bsize);
1661                        memcpy(b+len, s, l);
1662                        len += l;
1663                }
1664                if (b)
1665                        b[len] = '\0';
1666                setvar_p(intvar[F0], b);
1667                is_f0_split = TRUE;
1668
1669        } else if (v == intvar[F0]) {
1670                is_f0_split = FALSE;
1671
1672        } else if (v == intvar[FS]) {
1673                mk_splitter(getvar_s(v), &fsplitter);
1674
1675        } else if (v == intvar[RS]) {
1676                mk_splitter(getvar_s(v), &rsplitter);
1677
1678        } else if (v == intvar[IGNORECASE]) {
1679                icase = istrue(v);
1680
1681        } else {                                /* $n */
1682                n = getvar_i(intvar[NF]);
1683                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1684                /* right here v is invalid. Just to note... */
1685        }
1686}
1687
1688/* step through func/builtin/etc arguments */
1689static node *nextarg(node **pn)
1690{
1691        node *n;
1692
1693        n = *pn;
1694        if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1695                *pn = n->r.n;
1696                n = n->l.n;
1697        } else {
1698                *pn = NULL;
1699        }
1700        return n;
1701}
1702
1703static void hashwalk_init(var *v, xhash *array)
1704{
1705        char **w;
1706        hash_item *hi;
1707        unsigned i;
1708
1709        if (v->type & VF_WALK)
1710                free(v->x.walker);
1711
1712        v->type |= VF_WALK;
1713        w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1714        w[0] = w[1] = (char *)(w + 2);
1715        for (i = 0; i < array->csize; i++) {
1716                hi = array->items[i];
1717                while (hi) {
1718                        strcpy(*w, hi->name);
1719                        nextword(w);
1720                        hi = hi->next;
1721                }
1722        }
1723}
1724
1725static int hashwalk_next(var *v)
1726{
1727        char **w;
1728
1729        w = v->x.walker;
1730        if (w[1] == w[0])
1731                return FALSE;
1732
1733        setvar_s(v, nextword(w+1));
1734        return TRUE;
1735}
1736
1737/* evaluate node, return 1 when result is true, 0 otherwise */
1738static int ptest(node *pattern)
1739{
1740        /* ptest__v is "static": to save stack space? */
1741        return istrue(evaluate(pattern, &G.ptest__v));
1742}
1743
1744/* read next record from stream rsm into a variable v */
1745static int awk_getline(rstream *rsm, var *v)
1746{
1747        char *b;
1748        regmatch_t pmatch[2];
1749        int a, p, pp=0, size;
1750        int fd, so, eo, r, rp;
1751        char c, *m, *s;
1752
1753        /* we're using our own buffer since we need access to accumulating
1754         * characters
1755         */
1756        fd = fileno(rsm->F);
1757        m = rsm->buffer;
1758        a = rsm->adv;
1759        p = rsm->pos;
1760        size = rsm->size;
1761        c = (char) rsplitter.n.info;
1762        rp = 0;
1763
1764        if (!m) qrealloc(&m, 256, &size);
1765        do {
1766                b = m + a;
1767                so = eo = p;
1768                r = 1;
1769                if (p > 0) {
1770                        if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1771                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1772                                                        b, 1, pmatch, 0) == 0) {
1773                                        so = pmatch[0].rm_so;
1774                                        eo = pmatch[0].rm_eo;
1775                                        if (b[eo] != '\0')
1776                                                break;
1777                                }
1778                        } else if (c != '\0') {
1779                                s = strchr(b+pp, c);
1780                                if (!s) s = memchr(b+pp, '\0', p - pp);
1781                                if (s) {
1782                                        so = eo = s-b;
1783                                        eo++;
1784                                        break;
1785                                }
1786                        } else {
1787                                while (b[rp] == '\n')
1788                                        rp++;
1789                                s = strstr(b+rp, "\n\n");
1790                                if (s) {
1791                                        so = eo = s-b;
1792                                        while (b[eo] == '\n') eo++;
1793                                        if (b[eo] != '\0')
1794                                                break;
1795                                }
1796                        }
1797                }
1798
1799                if (a > 0) {
1800                        memmove(m, (const void *)(m+a), p+1);
1801                        b = m;
1802                        a = 0;
1803                }
1804
1805                qrealloc(&m, a+p+128, &size);
1806                b = m + a;
1807                pp = p;
1808                p += safe_read(fd, b+p, size-p-1);
1809                if (p < pp) {
1810                        p = 0;
1811                        r = 0;
1812                        setvar_i(intvar[ERRNO], errno);
1813                }
1814                b[p] = '\0';
1815
1816        } while (p > pp);
1817
1818        if (p == 0) {
1819                r--;
1820        } else {
1821                c = b[so]; b[so] = '\0';
1822                setvar_s(v, b+rp);
1823                v->type |= VF_USER;
1824                b[so] = c;
1825                c = b[eo]; b[eo] = '\0';
1826                setvar_s(intvar[RT], b+so);
1827                b[eo] = c;
1828        }
1829
1830        rsm->buffer = m;
1831        rsm->adv = a + eo;
1832        rsm->pos = p - eo;
1833        rsm->size = size;
1834
1835        return r;
1836}
1837
1838static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1839{
1840        int r = 0;
1841        char c;
1842        const char *s = format;
1843
1844        if (int_as_int && n == (int)n) {
1845                r = snprintf(b, size, "%d", (int)n);
1846        } else {
1847                do { c = *s; } while (c && *++s);
1848                if (strchr("diouxX", c)) {
1849                        r = snprintf(b, size, format, (int)n);
1850                } else if (strchr("eEfgG", c)) {
1851                        r = snprintf(b, size, format, n);
1852                } else {
1853                        syntax_error(EMSG_INV_FMT);
1854                }
1855        }
1856        return r;
1857}
1858
1859
1860/* formatted output into an allocated buffer, return ptr to buffer */
1861static char *awk_printf(node *n)
1862{
1863        char *b = NULL;
1864        char *fmt, *s, *f;
1865        const char *s1;
1866        int i, j, incr, bsize;
1867        char c, c1;
1868        var *v, *arg;
1869
1870        v = nvalloc(1);
1871        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1872
1873        i = 0;
1874        while (*f) {
1875                s = f;
1876                while (*f && (*f != '%' || *(++f) == '%'))
1877                        f++;
1878                while (*f && !isalpha(*f)) {
1879                        if (*f == '*')
1880                                syntax_error("%*x formats are not supported");
1881                        f++;
1882                }
1883
1884                incr = (f - s) + MAXVARFMT;
1885                qrealloc(&b, incr + i, &bsize);
1886                c = *f;
1887                if (c != '\0') f++;
1888                c1 = *f;
1889                *f = '\0';
1890                arg = evaluate(nextarg(&n), v);
1891
1892                j = i;
1893                if (c == 'c' || !c) {
1894                        i += sprintf(b+i, s, is_numeric(arg) ?
1895                                        (char)getvar_i(arg) : *getvar_s(arg));
1896                } else if (c == 's') {
1897                        s1 = getvar_s(arg);
1898                        qrealloc(&b, incr+i+strlen(s1), &bsize);
1899                        i += sprintf(b+i, s, s1);
1900                } else {
1901                        i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1902                }
1903                *f = c1;
1904
1905                /* if there was an error while sprintf, return value is negative */
1906                if (i < j) i = j;
1907        }
1908
1909        b = xrealloc(b, i + 1);
1910        free(fmt);
1911        nvfree(v);
1912        b[i] = '\0';
1913        return b;
1914}
1915
1916/* common substitution routine
1917 * replace (nm) substring of (src) that match (n) with (repl), store
1918 * result into (dest), return number of substitutions. If nm=0, replace
1919 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1920 * subexpression matching (\1-\9)
1921 */
1922static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1923{
1924        char *ds = NULL;
1925        const char *s;
1926        const char *sp;
1927        int c, i, j, di, rl, so, eo, nbs, n, dssize;
1928        regmatch_t pmatch[10];
1929        regex_t sreg, *re;
1930
1931        re = as_regex(rn, &sreg);
1932        if (!src) src = intvar[F0];
1933        if (!dest) dest = intvar[F0];
1934
1935        i = di = 0;
1936        sp = getvar_s(src);
1937        rl = strlen(repl);
1938        while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1939                so = pmatch[0].rm_so;
1940                eo = pmatch[0].rm_eo;
1941
1942                qrealloc(&ds, di + eo + rl, &dssize);
1943                memcpy(ds + di, sp, eo);
1944                di += eo;
1945                if (++i >= nm) {
1946                        /* replace */
1947                        di -= (eo - so);
1948                        nbs = 0;
1949                        for (s = repl; *s; s++) {
1950                                ds[di++] = c = *s;
1951                                if (c == '\\') {
1952                                        nbs++;
1953                                        continue;
1954                                }
1955                                if (c == '&' || (ex && c >= '0' && c <= '9')) {
1956                                        di -= ((nbs + 3) >> 1);
1957                                        j = 0;
1958                                        if (c != '&') {
1959                                                j = c - '0';
1960                                                nbs++;
1961                                        }
1962                                        if (nbs % 2) {
1963                                                ds[di++] = c;
1964                                        } else {
1965                                                n = pmatch[j].rm_eo - pmatch[j].rm_so;
1966                                                qrealloc(&ds, di + rl + n, &dssize);
1967                                                memcpy(ds + di, sp + pmatch[j].rm_so, n);
1968                                                di += n;
1969                                        }
1970                                }
1971                                nbs = 0;
1972                        }
1973                }
1974
1975                sp += eo;
1976                if (i == nm) break;
1977                if (eo == so) {
1978                        ds[di] = *sp++;
1979                        if (!ds[di++]) break;
1980                }
1981        }
1982
1983        qrealloc(&ds, di + strlen(sp), &dssize);
1984        strcpy(ds + di, sp);
1985        setvar_p(dest, ds);
1986        if (re == &sreg) regfree(re);
1987        return i;
1988}
1989
1990static var *exec_builtin(node *op, var *res)
1991{
1992#define tspl (G.exec_builtin__tspl)
1993
1994        int (*to_xxx)(int);
1995        var *tv;
1996        node *an[4];
1997        var *av[4];
1998        const char *as[4];
1999        regmatch_t pmatch[2];
2000        regex_t sreg, *re;
2001        node *spl;
2002        uint32_t isr, info;
2003        int nargs;
2004        time_t tt;
2005        char *s, *s1;
2006        int i, l, ll, n;
2007
2008        tv = nvalloc(4);
2009        isr = info = op->info;
2010        op = op->l.n;
2011
2012        av[2] = av[3] = NULL;
2013        for (i = 0; i < 4 && op; i++) {
2014                an[i] = nextarg(&op);
2015                if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2016                if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2017                isr >>= 1;
2018        }
2019
2020        nargs = i;
2021        if ((uint32_t)nargs < (info >> 30))
2022                syntax_error(EMSG_TOO_FEW_ARGS);
2023
2024        switch (info & OPNMASK) {
2025
2026        case B_a2:
2027#if ENABLE_FEATURE_AWK_LIBM
2028                setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2029#else
2030                syntax_error(EMSG_NO_MATH);
2031#endif
2032                break;
2033
2034        case B_sp:
2035                if (nargs > 2) {
2036                        spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2037                                an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2038                } else {
2039                        spl = &fsplitter.n;
2040                }
2041
2042                n = awk_split(as[0], spl, &s);
2043                s1 = s;
2044                clear_array(iamarray(av[1]));
2045                for (i = 1; i <= n; i++)
2046                        setari_u(av[1], i, nextword(&s1));
2047                free(s);
2048                setvar_i(res, n);
2049                break;
2050
2051        case B_ss:
2052                l = strlen(as[0]);
2053                i = getvar_i(av[1]) - 1;
2054                if (i > l) i = l;
2055                if (i < 0) i = 0;
2056                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2057                if (n < 0) n = 0;
2058                s = xstrndup(as[0]+i, n);
2059                setvar_p(res, s);
2060                break;
2061
2062        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2063         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2064        case B_an:
2065                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2066                break;
2067
2068        case B_co:
2069                setvar_i(res, ~getvar_i_int(av[0]));
2070                break;
2071
2072        case B_ls:
2073                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2074                break;
2075
2076        case B_or:
2077                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2078                break;
2079
2080        case B_rs:
2081                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2082                break;
2083
2084        case B_xo:
2085                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2086                break;
2087
2088        case B_lo:
2089                to_xxx = tolower;
2090                goto lo_cont;
2091
2092        case B_up:
2093                to_xxx = toupper;
2094 lo_cont:
2095                s1 = s = xstrdup(as[0]);
2096                while (*s1) {
2097                        *s1 = (*to_xxx)(*s1);
2098                        s1++;
2099                }
2100                setvar_p(res, s);
2101                break;
2102
2103        case B_ix:
2104                n = 0;
2105                ll = strlen(as[1]);
2106                l = strlen(as[0]) - ll;
2107                if (ll > 0 && l >= 0) {
2108                        if (!icase) {
2109                                s = strstr(as[0], as[1]);
2110                                if (s) n = (s - as[0]) + 1;
2111                        } else {
2112                                /* this piece of code is terribly slow and
2113                                 * really should be rewritten
2114                                 */
2115                                for (i=0; i<=l; i++) {
2116                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2117                                                n = i+1;
2118                                                break;
2119                                        }
2120                                }
2121                        }
2122                }
2123                setvar_i(res, n);
2124                break;
2125
2126        case B_ti:
2127                if (nargs > 1)
2128                        tt = getvar_i(av[1]);
2129                else
2130                        time(&tt);
2131                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2132                i = strftime(g_buf, MAXVARFMT,
2133                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2134                        localtime(&tt));
2135                g_buf[i] = '\0';
2136                setvar_s(res, g_buf);
2137                break;
2138
2139        case B_ma:
2140                re = as_regex(an[1], &sreg);
2141                n = regexec(re, as[0], 1, pmatch, 0);
2142                if (n == 0) {
2143                        pmatch[0].rm_so++;
2144                        pmatch[0].rm_eo++;
2145                } else {
2146                        pmatch[0].rm_so = 0;
2147                        pmatch[0].rm_eo = -1;
2148                }
2149                setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2150                setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2151                setvar_i(res, pmatch[0].rm_so);
2152                if (re == &sreg) regfree(re);
2153                break;
2154
2155        case B_ge:
2156                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2157                break;
2158
2159        case B_gs:
2160                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2161                break;
2162
2163        case B_su:
2164                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2165                break;
2166        }
2167
2168        nvfree(tv);
2169        return res;
2170#undef tspl
2171}
2172
2173/*
2174 * Evaluate node - the heart of the program. Supplied with subtree
2175 * and place where to store result. returns ptr to result.
2176 */
2177#define XC(n) ((n) >> 8)
2178
2179static var *evaluate(node *op, var *res)
2180{
2181/* This procedure is recursive so we should count every byte */
2182#define fnargs (G.evaluate__fnargs)
2183/* seed is initialized to 1 */
2184#define seed   (G.evaluate__seed)
2185#define sreg   (G.evaluate__sreg)
2186
2187        node *op1;
2188        var *v1;
2189        union {
2190                var *v;
2191                const char *s;
2192                double d;
2193                int i;
2194        } L, R;
2195        uint32_t opinfo;
2196        int opn;
2197        union {
2198                char *s;
2199                rstream *rsm;
2200                FILE *F;
2201                var *v;
2202                regex_t *re;
2203                uint32_t info;
2204        } X;
2205
2206        if (!op)
2207                return setvar_s(res, NULL);
2208
2209        v1 = nvalloc(2);
2210
2211        while (op) {
2212                opinfo = op->info;
2213                opn = (opinfo & OPNMASK);
2214                g_lineno = op->lineno;
2215
2216                /* execute inevitable things */
2217                op1 = op->l.n;
2218                if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2219                if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2220                if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2221                if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2222                if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2223
2224                switch (XC(opinfo & OPCLSMASK)) {
2225
2226                /* -- iterative node type -- */
2227
2228                /* test pattern */
2229                case XC( OC_TEST ):
2230                        if ((op1->info & OPCLSMASK) == OC_COMMA) {
2231                                /* it's range pattern */
2232                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2233                                        op->info |= OF_CHECKED;
2234                                        if (ptest(op1->r.n))
2235                                                op->info &= ~OF_CHECKED;
2236
2237                                        op = op->a.n;
2238                                } else {
2239                                        op = op->r.n;
2240                                }
2241                        } else {
2242                                op = (ptest(op1)) ? op->a.n : op->r.n;
2243                        }
2244                        break;
2245
2246                /* just evaluate an expression, also used as unconditional jump */
2247                case XC( OC_EXEC ):
2248                        break;
2249
2250                /* branch, used in if-else and various loops */
2251                case XC( OC_BR ):
2252                        op = istrue(L.v) ? op->a.n : op->r.n;
2253                        break;
2254
2255                /* initialize for-in loop */
2256                case XC( OC_WALKINIT ):
2257                        hashwalk_init(L.v, iamarray(R.v));
2258                        break;
2259
2260                /* get next array item */
2261                case XC( OC_WALKNEXT ):
2262                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2263                        break;
2264
2265                case XC( OC_PRINT ):
2266                case XC( OC_PRINTF ):
2267                        X.F = stdout;
2268                        if (op->r.n) {
2269                                X.rsm = newfile(R.s);
2270                                if (!X.rsm->F) {
2271                                        if (opn == '|') {
2272                                                X.rsm->F = popen(R.s, "w");
2273                                                if (X.rsm->F == NULL)
2274                                                        bb_perror_msg_and_die("popen");
2275                                                X.rsm->is_pipe = 1;
2276                                        } else {
2277                                                X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2278                                        }
2279                                }
2280                                X.F = X.rsm->F;
2281                        }
2282
2283                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2284                                if (!op1) {
2285                                        fputs(getvar_s(intvar[F0]), X.F);
2286                                } else {
2287                                        while (op1) {
2288                                                L.v = evaluate(nextarg(&op1), v1);
2289                                                if (L.v->type & VF_NUMBER) {
2290                                                        fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2291                                                                        getvar_i(L.v), TRUE);
2292                                                        fputs(g_buf, X.F);
2293                                                } else {
2294                                                        fputs(getvar_s(L.v), X.F);
2295                                                }
2296
2297                                                if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2298                                        }
2299                                }
2300                                fputs(getvar_s(intvar[ORS]), X.F);
2301
2302                        } else {        /* OC_PRINTF */
2303                                L.s = awk_printf(op1);
2304                                fputs(L.s, X.F);
2305                                free((char*)L.s);
2306                        }
2307                        fflush(X.F);
2308                        break;
2309
2310                case XC( OC_DELETE ):
2311                        X.info = op1->info & OPCLSMASK;
2312                        if (X.info == OC_VAR) {
2313                                R.v = op1->l.v;
2314                        } else if (X.info == OC_FNARG) {
2315                                R.v = &fnargs[op1->l.i];
2316                        } else {
2317                                syntax_error(EMSG_NOT_ARRAY);
2318                        }
2319
2320                        if (op1->r.n) {
2321                                clrvar(L.v);
2322                                L.s = getvar_s(evaluate(op1->r.n, v1));
2323                                hash_remove(iamarray(R.v), L.s);
2324                        } else {
2325                                clear_array(iamarray(R.v));
2326                        }
2327                        break;
2328
2329                case XC( OC_NEWSOURCE ):
2330                        g_progname = op->l.s;
2331                        break;
2332
2333                case XC( OC_RETURN ):
2334                        copyvar(res, L.v);
2335                        break;
2336
2337                case XC( OC_NEXTFILE ):
2338                        nextfile = TRUE;
2339                case XC( OC_NEXT ):
2340                        nextrec = TRUE;
2341                case XC( OC_DONE ):
2342                        clrvar(res);
2343                        break;
2344
2345                case XC( OC_EXIT ):
2346                        awk_exit(L.d);
2347
2348                /* -- recursive node type -- */
2349
2350                case XC( OC_VAR ):
2351                        L.v = op->l.v;
2352                        if (L.v == intvar[NF])
2353                                split_f0();
2354                        goto v_cont;
2355
2356                case XC( OC_FNARG ):
2357                        L.v = &fnargs[op->l.i];
2358 v_cont:
2359                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2360                        break;
2361
2362                case XC( OC_IN ):
2363                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2364                        break;
2365
2366                case XC( OC_REGEXP ):
2367                        op1 = op;
2368                        L.s = getvar_s(intvar[F0]);
2369                        goto re_cont;
2370
2371                case XC( OC_MATCH ):
2372                        op1 = op->r.n;
2373 re_cont:
2374                        X.re = as_regex(op1, &sreg);
2375                        R.i = regexec(X.re, L.s, 0, NULL, 0);
2376                        if (X.re == &sreg) regfree(X.re);
2377                        setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2378                        break;
2379
2380                case XC( OC_MOVE ):
2381                        /* if source is a temporary string, jusk relink it to dest */
2382                        if (R.v == v1+1 && R.v->string) {
2383                                res = setvar_p(L.v, R.v->string);
2384                                R.v->string = NULL;
2385                        } else {
2386                                res = copyvar(L.v, R.v);
2387                        }
2388                        break;
2389
2390                case XC( OC_TERNARY ):
2391                        if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2392                                syntax_error(EMSG_POSSIBLE_ERROR);
2393                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2394                        break;
2395
2396                case XC( OC_FUNC ):
2397                        if (!op->r.f->body.first)
2398                                syntax_error(EMSG_UNDEF_FUNC);
2399
2400                        X.v = R.v = nvalloc(op->r.f->nargs+1);
2401                        while (op1) {
2402                                L.v = evaluate(nextarg(&op1), v1);
2403                                copyvar(R.v, L.v);
2404                                R.v->type |= VF_CHILD;
2405                                R.v->x.parent = L.v;
2406                                if (++R.v - X.v >= op->r.f->nargs)
2407                                        break;
2408                        }
2409
2410                        R.v = fnargs;
2411                        fnargs = X.v;
2412
2413                        L.s = g_progname;
2414                        res = evaluate(op->r.f->body.first, res);
2415                        g_progname = L.s;
2416
2417                        nvfree(fnargs);
2418                        fnargs = R.v;
2419                        break;
2420
2421                case XC( OC_GETLINE ):
2422                case XC( OC_PGETLINE ):
2423                        if (op1) {
2424                                X.rsm = newfile(L.s);
2425                                if (!X.rsm->F) {
2426                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2427                                                X.rsm->F = popen(L.s, "r");
2428                                                X.rsm->is_pipe = TRUE;
2429                                        } else {
2430                                                X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2431                                        }
2432                                }
2433                        } else {
2434                                if (!iF) iF = next_input_file();
2435                                X.rsm = iF;
2436                        }
2437
2438                        if (!X.rsm->F) {
2439                                setvar_i(intvar[ERRNO], errno);
2440                                setvar_i(res, -1);
2441                                break;
2442                        }
2443
2444                        if (!op->r.n)
2445                                R.v = intvar[F0];
2446
2447                        L.i = awk_getline(X.rsm, R.v);
2448                        if (L.i > 0) {
2449                                if (!op1) {
2450                                        incvar(intvar[FNR]);
2451                                        incvar(intvar[NR]);
2452                                }
2453                        }
2454                        setvar_i(res, L.i);
2455                        break;
2456
2457                /* simple builtins */
2458                case XC( OC_FBLTIN ):
2459                        switch (opn) {
2460
2461                        case F_in:
2462                                R.d = (int)L.d;
2463                                break;
2464
2465                        case F_rn:
2466                                R.d = (double)rand() / (double)RAND_MAX;
2467                                break;
2468#if ENABLE_FEATURE_AWK_LIBM
2469                        case F_co:
2470                                R.d = cos(L.d);
2471                                break;
2472
2473                        case F_ex:
2474                                R.d = exp(L.d);
2475                                break;
2476
2477                        case F_lg:
2478                                R.d = log(L.d);
2479                                break;
2480
2481                        case F_si:
2482                                R.d = sin(L.d);
2483                                break;
2484
2485                        case F_sq:
2486                                R.d = sqrt(L.d);
2487                                break;
2488#else
2489                        case F_co:
2490                        case F_ex:
2491                        case F_lg:
2492                        case F_si:
2493                        case F_sq:
2494                                syntax_error(EMSG_NO_MATH);
2495                                break;
2496#endif
2497                        case F_sr:
2498                                R.d = (double)seed;
2499                                seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2500                                srand(seed);
2501                                break;
2502
2503                        case F_ti:
2504                                R.d = time(NULL);
2505                                break;
2506
2507                        case F_le:
2508                                if (!op1)
2509                                        L.s = getvar_s(intvar[F0]);
2510                                R.d = strlen(L.s);
2511                                break;
2512
2513                        case F_sy:
2514                                fflush(NULL);
2515                                R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2516                                                ? (system(L.s) >> 8) : 0;
2517                                break;
2518
2519                        case F_ff:
2520                                if (!op1)
2521                                        fflush(stdout);
2522                                else {
2523                                        if (L.s && *L.s) {
2524                                                X.rsm = newfile(L.s);
2525                                                fflush(X.rsm->F);
2526                                        } else {
2527                                                fflush(NULL);
2528                                        }
2529                                }
2530                                break;
2531
2532                        case F_cl:
2533                                X.rsm = (rstream *)hash_search(fdhash, L.s);
2534                                if (X.rsm) {
2535                                        R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2536                                        free(X.rsm->buffer);
2537                                        hash_remove(fdhash, L.s);
2538                                }
2539                                if (R.i != 0)
2540                                        setvar_i(intvar[ERRNO], errno);
2541                                R.d = (double)R.i;
2542                                break;
2543                        }
2544                        setvar_i(res, R.d);
2545                        break;
2546
2547                case XC( OC_BUILTIN ):
2548                        res = exec_builtin(op, res);
2549                        break;
2550
2551                case XC( OC_SPRINTF ):
2552                        setvar_p(res, awk_printf(op1));
2553                        break;
2554
2555                case XC( OC_UNARY ):
2556                        X.v = R.v;
2557                        L.d = R.d = getvar_i(R.v);
2558                        switch (opn) {
2559                        case 'P':
2560                                L.d = ++R.d;
2561                                goto r_op_change;
2562                        case 'p':
2563                                R.d++;
2564                                goto r_op_change;
2565                        case 'M':
2566                                L.d = --R.d;
2567                                goto r_op_change;
2568                        case 'm':
2569                                R.d--;
2570                                goto r_op_change;
2571                        case '!':
2572                                L.d = istrue(X.v) ? 0 : 1;
2573                                break;
2574                        case '-':
2575                                L.d = -R.d;
2576                                break;
2577 r_op_change:
2578                                setvar_i(X.v, R.d);
2579                        }
2580                        setvar_i(res, L.d);
2581                        break;
2582
2583                case XC( OC_FIELD ):
2584                        R.i = (int)getvar_i(R.v);
2585                        if (R.i == 0) {
2586                                res = intvar[F0];
2587                        } else {
2588                                split_f0();
2589                                if (R.i > nfields)
2590                                        fsrealloc(R.i);
2591                                res = &Fields[R.i - 1];
2592                        }
2593                        break;
2594
2595                /* concatenation (" ") and index joining (",") */
2596                case XC( OC_CONCAT ):
2597                case XC( OC_COMMA ):
2598                        opn = strlen(L.s) + strlen(R.s) + 2;
2599                        X.s = xmalloc(opn);
2600                        strcpy(X.s, L.s);
2601                        if ((opinfo & OPCLSMASK) == OC_COMMA) {
2602                                L.s = getvar_s(intvar[SUBSEP]);
2603                                X.s = xrealloc(X.s, opn + strlen(L.s));
2604                                strcat(X.s, L.s);
2605                        }
2606                        strcat(X.s, R.s);
2607                        setvar_p(res, X.s);
2608                        break;
2609
2610                case XC( OC_LAND ):
2611                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2612                        break;
2613
2614                case XC( OC_LOR ):
2615                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2616                        break;
2617
2618                case XC( OC_BINARY ):
2619                case XC( OC_REPLACE ):
2620                        R.d = getvar_i(R.v);
2621                        switch (opn) {
2622                        case '+':
2623                                L.d += R.d;
2624                                break;
2625                        case '-':
2626                                L.d -= R.d;
2627                                break;
2628                        case '*':
2629                                L.d *= R.d;
2630                                break;
2631                        case '/':
2632                                if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2633                                L.d /= R.d;
2634                                break;
2635                        case '&':
2636#if ENABLE_FEATURE_AWK_LIBM
2637                                L.d = pow(L.d, R.d);
2638#else
2639                                syntax_error(EMSG_NO_MATH);
2640#endif
2641                                break;
2642                        case '%':
2643                                if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2644                                L.d -= (int)(L.d / R.d) * R.d;
2645                                break;
2646                        }
2647                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2648                        break;
2649
2650                case XC( OC_COMPARE ):
2651                        if (is_numeric(L.v) && is_numeric(R.v)) {
2652                                L.d = getvar_i(L.v) - getvar_i(R.v);
2653                        } else {
2654                                L.s = getvar_s(L.v);
2655                                R.s = getvar_s(R.v);
2656                                L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2657                        }
2658                        switch (opn & 0xfe) {
2659                        case 0:
2660                                R.i = (L.d > 0);
2661                                break;
2662                        case 2:
2663                                R.i = (L.d >= 0);
2664                                break;
2665                        case 4:
2666                                R.i = (L.d == 0);
2667                                break;
2668                        }
2669                        setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2670                        break;
2671
2672                default:
2673                        syntax_error(EMSG_POSSIBLE_ERROR);
2674                }
2675                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2676                        op = op->a.n;
2677                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2678                        break;
2679                if (nextrec)
2680                        break;
2681        }
2682        nvfree(v1);
2683        return res;
2684#undef fnargs
2685#undef seed
2686#undef sreg
2687}
2688
2689
2690/* -------- main & co. -------- */
2691
2692static int awk_exit(int r)
2693{
2694        var tv;
2695        unsigned i;
2696        hash_item *hi;
2697
2698        zero_out_var(&tv);
2699
2700        if (!exiting) {
2701                exiting = TRUE;
2702                nextrec = FALSE;
2703                evaluate(endseq.first, &tv);
2704        }
2705
2706        /* waiting for children */
2707        for (i = 0; i < fdhash->csize; i++) {
2708                hi = fdhash->items[i];
2709                while (hi) {
2710                        if (hi->data.rs.F && hi->data.rs.is_pipe)
2711                                pclose(hi->data.rs.F);
2712                        hi = hi->next;
2713                }
2714        }
2715
2716        exit(r);
2717}
2718
2719/* if expr looks like "var=value", perform assignment and return 1,
2720 * otherwise return 0 */
2721static int is_assignment(const char *expr)
2722{
2723        char *exprc, *s, *s0, *s1;
2724
2725        exprc = xstrdup(expr);
2726        if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2727                free(exprc);
2728                return FALSE;
2729        }
2730
2731        *(s++) = '\0';
2732        s0 = s1 = s;
2733        while (*s)
2734                *(s1++) = nextchar(&s);
2735
2736        *s1 = '\0';
2737        setvar_u(newvar(exprc), s0);
2738        free(exprc);
2739        return TRUE;
2740}
2741
2742/* switch to next input file */
2743static rstream *next_input_file(void)
2744{
2745#define rsm          (G.next_input_file__rsm)
2746#define files_happen (G.next_input_file__files_happen)
2747
2748        FILE *F = NULL;
2749        const char *fname, *ind;
2750
2751        if (rsm.F) fclose(rsm.F);
2752        rsm.F = NULL;
2753        rsm.pos = rsm.adv = 0;
2754
2755        do {
2756                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2757                        if (files_happen)
2758                                return NULL;
2759                        fname = "-";
2760                        F = stdin;
2761                } else {
2762                        ind = getvar_s(incvar(intvar[ARGIND]));
2763                        fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2764                        if (fname && *fname && !is_assignment(fname))
2765                                F = xfopen_stdin(fname);
2766                }
2767        } while (!F);
2768
2769        files_happen = TRUE;
2770        setvar_s(intvar[FILENAME], fname);
2771        rsm.F = F;
2772        return &rsm;
2773#undef rsm
2774#undef files_happen
2775}
2776
2777int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2778int awk_main(int argc, char **argv)
2779{
2780        unsigned opt;
2781        char *opt_F, *opt_W;
2782        llist_t *list_v = NULL;
2783        llist_t *list_f = NULL;
2784        int i, j;
2785        var *v;
2786        var tv;
2787        char **envp;
2788        char *vnames = (char *)vNames; /* cheat */
2789        char *vvalues = (char *)vValues;
2790
2791        INIT_G();
2792
2793        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2794         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2795        if (ENABLE_LOCALE_SUPPORT)
2796                setlocale(LC_NUMERIC, "C");
2797
2798        zero_out_var(&tv);
2799
2800        /* allocate global buffer */
2801        g_buf = xmalloc(MAXVARFMT + 1);
2802
2803        vhash = hash_init();
2804        ahash = hash_init();
2805        fdhash = hash_init();
2806        fnhash = hash_init();
2807
2808        /* initialize variables */
2809        for (i = 0; *vnames; i++) {
2810                intvar[i] = v = newvar(nextword(&vnames));
2811                if (*vvalues != '\377')
2812                        setvar_s(v, nextword(&vvalues));
2813                else
2814                        setvar_i(v, 0);
2815
2816                if (*vnames == '*') {
2817                        v->type |= VF_SPECIAL;
2818                        vnames++;
2819                }
2820        }
2821
2822        handle_special(intvar[FS]);
2823        handle_special(intvar[RS]);
2824
2825        newfile("/dev/stdin")->F = stdin;
2826        newfile("/dev/stdout")->F = stdout;
2827        newfile("/dev/stderr")->F = stderr;
2828
2829        /* Huh, people report that sometimes environ is NULL. Oh well. */
2830        if (environ) for (envp = environ; *envp; envp++) {
2831                /* environ is writable, thus we don't strdup it needlessly */
2832                char *s = *envp;
2833                char *s1 = strchr(s, '=');
2834                if (s1) {
2835                        *s1 = '\0';
2836                        /* Both findvar and setvar_u take const char*
2837                         * as 2nd arg -> environment is not trashed */
2838                        setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2839                        *s1 = '=';
2840                }
2841        }
2842        opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2843        opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2844        argv += optind;
2845        argc -= optind;
2846        if (opt & 0x1)
2847                setvar_s(intvar[FS], opt_F); // -F
2848        while (list_v) { /* -v */
2849                if (!is_assignment(llist_pop(&list_v)))
2850                        bb_show_usage();
2851        }
2852        if (list_f) { /* -f */
2853                do {
2854                        char *s = NULL;
2855                        FILE *from_file;
2856
2857                        g_progname = llist_pop(&list_f);
2858                        from_file = xfopen_stdin(g_progname);
2859                        /* one byte is reserved for some trick in next_token */
2860                        for (i = j = 1; j > 0; i += j) {
2861                                s = xrealloc(s, i + 4096);
2862                                j = fread(s + i, 1, 4094, from_file);
2863                        }
2864                        s[i] = '\0';
2865                        fclose(from_file);
2866                        parse_program(s + 1);
2867                        free(s);
2868                } while (list_f);
2869        } else { // no -f: take program from 1st parameter
2870                if (!argc)
2871                        bb_show_usage();
2872                g_progname = "cmd. line";
2873                parse_program(*argv++);
2874                argc--;
2875        }
2876        if (opt & 0x8) // -W
2877                bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2878
2879        /* fill in ARGV array */
2880        setvar_i(intvar[ARGC], argc + 1);
2881        setari_u(intvar[ARGV], 0, "awk");
2882        i = 0;
2883        while (*argv)
2884                setari_u(intvar[ARGV], ++i, *argv++);
2885
2886        evaluate(beginseq.first, &tv);
2887        if (!mainseq.first && !endseq.first)
2888                awk_exit(EXIT_SUCCESS);
2889
2890        /* input file could already be opened in BEGIN block */
2891        if (!iF) iF = next_input_file();
2892
2893        /* passing through input files */
2894        while (iF) {
2895                nextfile = FALSE;
2896                setvar_i(intvar[FNR], 0);
2897
2898                while ((i = awk_getline(iF, intvar[F0])) > 0) {
2899                        nextrec = FALSE;
2900                        incvar(intvar[NR]);
2901                        incvar(intvar[FNR]);
2902                        evaluate(mainseq.first, &tv);
2903
2904                        if (nextfile)
2905                                break;
2906                }
2907
2908                if (i < 0)
2909                        syntax_error(strerror(errno));
2910
2911                iF = next_input_file();
2912        }
2913
2914        awk_exit(EXIT_SUCCESS);
2915        /*return 0;*/
2916}
2917