busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9//config:config AWK
  10//config:       bool "awk (23 kb)"
  11//config:       default y
  12//config:       help
  13//config:       Awk is used as a pattern scanning and processing language.
  14//config:
  15//config:config FEATURE_AWK_LIBM
  16//config:       bool "Enable math functions (requires libm)"
  17//config:       default y
  18//config:       depends on AWK
  19//config:       help
  20//config:       Enable math functions of the Awk programming language.
  21//config:       NOTE: This requires libm to be present for linking.
  22//config:
  23//config:config FEATURE_AWK_GNU_EXTENSIONS
  24//config:       bool "Enable a few GNU extensions"
  25//config:       default y
  26//config:       depends on AWK
  27//config:       help
  28//config:       Enable a few features from gawk:
  29//config:       * command line option -e AWK_PROGRAM
  30//config:       * simultaneous use of -f and -e on the command line.
  31//config:       This enables the use of awk library files.
  32//config:       Example: awk -f mylib.awk -e '{print myfunction($1);}' ...
  33
  34//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
  35
  36//kbuild:lib-$(CONFIG_AWK) += awk.o
  37
  38//usage:#define awk_trivial_usage
  39//usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
  40//usage:#define awk_full_usage "\n\n"
  41//usage:       "        -v VAR=VAL      Set variable"
  42//usage:     "\n        -F SEP          Use SEP as field separator"
  43//usage:     "\n        -f FILE         Read program from FILE"
  44//usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
  45//usage:     "\n        -e AWK_PROGRAM"
  46//usage:        )
  47
  48#include "libbb.h"
  49#include "xregex.h"
  50#include <math.h>
  51
  52/* This is a NOEXEC applet. Be very careful! */
  53
  54
  55/* If you comment out one of these below, it will be #defined later
  56 * to perform debug printfs to stderr: */
  57#define debug_printf_walker(...)  do {} while (0)
  58#define debug_printf_eval(...)  do {} while (0)
  59#define debug_printf_parse(...)  do {} while (0)
  60
  61#ifndef debug_printf_walker
  62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
  63#endif
  64#ifndef debug_printf_eval
  65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
  66#endif
  67#ifndef debug_printf_parse
  68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
  69#else
  70# define debug_parse_print_tc(...) ((void)0)
  71#endif
  72
  73
  74/* "+": stop on first non-option:
  75 * $ awk 'BEGIN { for(i=1; i<ARGC; ++i) { print i ": " ARGV[i] }}' -argz
  76 * 1: -argz
  77 */
  78#define OPTSTR_AWK "+" \
  79        "F:v:*f:*" \
  80        IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
  81        "W:"
  82enum {
  83        OPTBIT_F,       /* define field separator */
  84        OPTBIT_v,       /* define variable */
  85        OPTBIT_f,       /* pull in awk program from file */
  86        IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
  87        OPTBIT_W,       /* -W ignored */
  88        OPT_F = 1 << OPTBIT_F,
  89        OPT_v = 1 << OPTBIT_v,
  90        OPT_f = 1 << OPTBIT_f,
  91        OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
  92        OPT_W = 1 << OPTBIT_W
  93};
  94
  95#define MAXVARFMT       240
  96
  97/* variable flags */
  98#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  99#define VF_ARRAY        0x0002  /* 1 = it's an array */
 100
 101#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
 102#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
 103#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
 104#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
 105#define VF_FSTR         0x1000  /* 1 = don't free() var::string (not malloced, or is owned by something else) */
 106#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
 107#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
 108
 109/* these flags are static, don't change them when value is changed */
 110#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
 111
 112typedef struct walker_list {
 113        char *end;
 114        char *cur;
 115        struct walker_list *prev;
 116        char wbuf[1];
 117} walker_list;
 118
 119/* Variable */
 120typedef struct var_s {
 121        unsigned type;            /* flags */
 122        char *string;
 123        double number;
 124        union {
 125                int aidx;               /* func arg idx (for compilation stage) */
 126                struct xhash_s *array;  /* array ptr */
 127                struct var_s *parent;   /* for func args, ptr to actual parameter */
 128                walker_list *walker;    /* list of array elements (for..in) */
 129        } x;
 130} var;
 131
 132/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
 133typedef struct chain_s {
 134        struct node_s *first;
 135        struct node_s *last;
 136        const char *programname;
 137} chain;
 138
 139/* Function */
 140typedef struct func_s {
 141        unsigned nargs;
 142        smallint defined;
 143        struct chain_s body;
 144} func;
 145
 146/* I/O stream */
 147typedef struct rstream_s {
 148        FILE *F;
 149        char *buffer;
 150        int adv;
 151        int size;
 152        int pos;
 153        smallint is_pipe;
 154} rstream;
 155
 156typedef struct hash_item_s {
 157        union {
 158                struct var_s v;         /* variable/array hash */
 159                struct rstream_s rs;    /* redirect streams hash */
 160                struct func_s f;        /* functions hash */
 161        } data;
 162        struct hash_item_s *next;       /* next in chain */
 163        char name[1];                   /* really it's longer */
 164} hash_item;
 165
 166typedef struct xhash_s {
 167        unsigned nel;           /* num of elements */
 168        unsigned csize;         /* current hash size */
 169        unsigned nprime;        /* next hash size in PRIMES[] */
 170        unsigned glen;          /* summary length of item names */
 171        struct hash_item_s **items;
 172} xhash;
 173
 174/* Tree node */
 175typedef struct node_s {
 176        uint32_t info;
 177        unsigned lineno;
 178        union {
 179                struct node_s *n;
 180                var *v;
 181                int aidx;
 182                const char *new_progname;
 183                regex_t *re;
 184        } l;
 185        union {
 186                struct node_s *n;
 187                regex_t *ire;
 188                func *f;
 189        } r;
 190        union {
 191                struct node_s *n;
 192        } a;
 193} node;
 194
 195typedef struct tsplitter_s {
 196        node n;
 197        regex_t re[2];
 198} tsplitter;
 199
 200/* simple token classes */
 201/* order and hex values are very important!!!  See next_token() */
 202#define TC_LPAREN       (1 << 0)        /* ( */
 203#define TC_RPAREN       (1 << 1)        /* ) */
 204#define TC_REGEXP       (1 << 2)        /* /.../ */
 205#define TC_OUTRDR       (1 << 3)        /* | > >> */
 206#define TC_UOPPOST      (1 << 4)        /* unary postfix operator ++ -- */
 207#define TC_UOPPRE1      (1 << 5)        /* unary prefix operator ++ -- $ */
 208#define TC_BINOPX       (1 << 6)        /* two-opnd operator */
 209#define TC_IN           (1 << 7)        /* 'in' */
 210#define TC_COMMA        (1 << 8)        /* , */
 211#define TC_PIPE         (1 << 9)        /* input redirection pipe | */
 212#define TC_UOPPRE2      (1 << 10)       /* unary prefix operator + - ! */
 213#define TC_ARRTERM      (1 << 11)       /* ] */
 214#define TC_LBRACE       (1 << 12)       /* { */
 215#define TC_RBRACE       (1 << 13)       /* } */
 216#define TC_SEMICOL      (1 << 14)       /* ; */
 217#define TC_NEWLINE      (1 << 15)
 218#define TC_STATX        (1 << 16)       /* ctl statement (for, next...) */
 219#define TC_WHILE        (1 << 17)       /* 'while' */
 220#define TC_ELSE         (1 << 18)       /* 'else' */
 221#define TC_BUILTIN      (1 << 19)
 222/* This costs ~50 bytes of code.
 223 * A separate class to support deprecated "length" form. If we don't need that
 224 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
 225 * can be merged with TC_BUILTIN:
 226 */
 227#define TC_LENGTH       (1 << 20)       /* 'length' */
 228#define TC_GETLINE      (1 << 21)       /* 'getline' */
 229#define TC_FUNCDECL     (1 << 22)       /* 'function' 'func' */
 230#define TC_BEGIN        (1 << 23)       /* 'BEGIN' */
 231#define TC_END          (1 << 24)       /* 'END' */
 232#define TC_EOF          (1 << 25)
 233#define TC_VARIABLE     (1 << 26)       /* name */
 234#define TC_ARRAY        (1 << 27)       /* name[ */
 235#define TC_FUNCTION     (1 << 28)       /* name( */
 236#define TC_STRING       (1 << 29)       /* "..." */
 237#define TC_NUMBER       (1 << 30)
 238
 239#ifndef debug_parse_print_tc
 240static void debug_parse_print_tc(uint32_t n)
 241{
 242        if (n & TC_LPAREN  ) debug_printf_parse(" LPAREN"  );
 243        if (n & TC_RPAREN  ) debug_printf_parse(" RPAREN"  );
 244        if (n & TC_REGEXP  ) debug_printf_parse(" REGEXP"  );
 245        if (n & TC_OUTRDR  ) debug_printf_parse(" OUTRDR"  );
 246        if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
 247        if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
 248        if (n & TC_BINOPX  ) debug_printf_parse(" BINOPX"  );
 249        if (n & TC_IN      ) debug_printf_parse(" IN"      );
 250        if (n & TC_COMMA   ) debug_printf_parse(" COMMA"   );
 251        if (n & TC_PIPE    ) debug_printf_parse(" PIPE"    );
 252        if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
 253        if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
 254        if (n & TC_LBRACE  ) debug_printf_parse(" LBRACE"  );
 255        if (n & TC_RBRACE  ) debug_printf_parse(" RBRACE"  );
 256        if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
 257        if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
 258        if (n & TC_STATX   ) debug_printf_parse(" STATX"   );
 259        if (n & TC_WHILE   ) debug_printf_parse(" WHILE"   );
 260        if (n & TC_ELSE    ) debug_printf_parse(" ELSE"    );
 261        if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
 262        if (n & TC_LENGTH  ) debug_printf_parse(" LENGTH"  );
 263        if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
 264        if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
 265        if (n & TC_BEGIN   ) debug_printf_parse(" BEGIN"   );
 266        if (n & TC_END     ) debug_printf_parse(" END"     );
 267        if (n & TC_EOF     ) debug_printf_parse(" EOF"     );
 268        if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
 269        if (n & TC_ARRAY   ) debug_printf_parse(" ARRAY"   );
 270        if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
 271        if (n & TC_STRING  ) debug_printf_parse(" STRING"  );
 272        if (n & TC_NUMBER  ) debug_printf_parse(" NUMBER"  );
 273}
 274#endif
 275
 276/* combined token classes ("token [class] sets") */
 277#define TS_UOPPRE   (TC_UOPPRE1 | TC_UOPPRE2)
 278
 279#define TS_BINOP    (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 280//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
 281#define TS_OPERAND  (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 282                    | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 283                    | TC_LPAREN | TC_STRING | TC_NUMBER)
 284
 285#define TS_LVALUE   (TC_VARIABLE | TC_ARRAY)
 286#define TS_STATEMNT (TC_STATX | TC_WHILE)
 287
 288/* word tokens, cannot mean something else if not expected */
 289#define TS_WORD     (TC_IN | TS_STATEMNT | TC_ELSE \
 290                    | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 291                    | TC_FUNCDECL | TC_BEGIN | TC_END)
 292
 293/* discard newlines after these */
 294#define TS_NOTERM   (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
 295                    | TC_SEMICOL | TC_NEWLINE)
 296
 297/* what can expression begin with */
 298#define TS_OPSEQ    (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
 299/* what can group begin with */
 300#define TS_GRPSEQ   (TS_OPSEQ | TS_STATEMNT \
 301                    | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
 302
 303/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
 304/* operator is inserted between them */
 305#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
 306                   | TC_STRING | TC_NUMBER | TC_UOPPOST \
 307                   | TC_LENGTH)
 308#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
 309
 310#define OF_RES1     0x010000
 311#define OF_RES2     0x020000
 312#define OF_STR1     0x040000
 313#define OF_STR2     0x080000
 314#define OF_NUM1     0x100000
 315#define OF_CHECKED  0x200000
 316#define OF_REQUIRED 0x400000
 317
 318/* combined operator flags */
 319#define xx      0
 320#define xV      OF_RES2
 321#define xS      (OF_RES2 | OF_STR2)
 322#define Vx      OF_RES1
 323#define Rx      OF_REQUIRED
 324#define VV      (OF_RES1 | OF_RES2)
 325#define Nx      (OF_RES1 | OF_NUM1)
 326#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 327#define Sx      (OF_RES1 | OF_STR1)
 328#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 329#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 330
 331#define OPCLSMASK 0xFF00
 332#define OPNMASK   0x007F
 333
 334/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 335 * (for builtins it has different meaning)
 336 */
 337#undef P
 338#undef PRIMASK
 339#undef PRIMASK2
 340#define P(x)      (x << 24)
 341#define PRIMASK   0x7F000000
 342#define PRIMASK2  0x7E000000
 343
 344/* Operation classes */
 345#define SHIFT_TIL_THIS  0x0600
 346#define RECUR_FROM_THIS 0x1000
 347enum {
 348        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 349        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 350
 351        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 352        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 353        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 354
 355        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 356        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 357        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 358        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 359        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 360        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 361        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 362        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 363        OC_DONE = 0x2800,
 364
 365        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 366        ST_WHILE = 0x3300
 367};
 368
 369/* simple builtins */
 370enum {
 371        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 372        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 373};
 374
 375/* builtins */
 376enum {
 377        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
 378        B_ge,   B_gs,   B_su,
 379        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 380};
 381
 382/* tokens and their corresponding info values */
 383
 384#define NTC     "\377"  /* switch to next token class (tc<<1) */
 385#define NTCC    '\377'
 386
 387static const char tokenlist[] ALIGN1 =
 388        "\1("         NTC                                   /* TC_LPAREN */
 389        "\1)"         NTC                                   /* TC_RPAREN */
 390        "\1/"         NTC                                   /* TC_REGEXP */
 391        "\2>>"        "\1>"         "\1|"       NTC         /* TC_OUTRDR */
 392        "\2++"        "\2--"        NTC                     /* TC_UOPPOST */
 393        "\2++"        "\2--"        "\1$"       NTC         /* TC_UOPPRE1 */
 394        "\2=="        "\1="         "\2+="      "\2-="      /* TC_BINOPX */
 395        "\2*="        "\2/="        "\2%="      "\2^="
 396        "\1+"         "\1-"         "\3**="     "\2**"
 397        "\1/"         "\1%"         "\1^"       "\1*"
 398        "\2!="        "\2>="        "\2<="      "\1>"
 399        "\1<"         "\2!~"        "\1~"       "\2&&"
 400        "\2||"        "\1?"         "\1:"       NTC
 401        "\2in"        NTC                                   /* TC_IN */
 402        "\1,"         NTC                                   /* TC_COMMA */
 403        "\1|"         NTC                                   /* TC_PIPE */
 404        "\1+"         "\1-"         "\1!"       NTC         /* TC_UOPPRE2 */
 405        "\1]"         NTC                                   /* TC_ARRTERM */
 406        "\1{"         NTC                                   /* TC_LBRACE */
 407        "\1}"         NTC                                   /* TC_RBRACE */
 408        "\1;"         NTC                                   /* TC_SEMICOL */
 409        "\1\n"        NTC                                   /* TC_NEWLINE */
 410        "\2if"        "\2do"        "\3for"     "\5break"   /* TC_STATX */
 411        "\10continue" "\6delete"    "\5print"
 412        "\6printf"    "\4next"      "\10nextfile"
 413        "\6return"    "\4exit"      NTC
 414        "\5while"     NTC                                   /* TC_WHILE */
 415        "\4else"      NTC                                   /* TC_ELSE */
 416        "\3and"       "\5compl"     "\6lshift"  "\2or"      /* TC_BUILTIN */
 417        "\6rshift"    "\3xor"
 418        "\5close"     "\6system"    "\6fflush"  "\5atan2"
 419        "\3cos"       "\3exp"       "\3int"     "\3log"
 420        "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
 421        "\6gensub"    "\4gsub"      "\5index"   /* "\6length" was here */
 422        "\5match"     "\5split"     "\7sprintf" "\3sub"
 423        "\6substr"    "\7systime"   "\10strftime" "\6mktime"
 424        "\7tolower"   "\7toupper"   NTC
 425        "\6length"    NTC                                   /* TC_LENGTH */
 426        "\7getline"   NTC                                   /* TC_GETLINE */
 427        "\4func"      "\10function" NTC                     /* TC_FUNCDECL */
 428        "\5BEGIN"     NTC                                   /* TC_BEGIN */
 429        "\3END"                                             /* TC_END */
 430        /* compiler adds trailing "\0" */
 431        ;
 432
 433static const uint32_t tokeninfo[] ALIGN4 = {
 434        0,
 435        0,
 436#define TI_REGEXP OC_REGEXP
 437        TI_REGEXP,
 438        xS|'a',                  xS|'w',                  xS|'|',
 439        OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 440#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
 441#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
 442        TI_PREINC,               TI_PREDEC,               OC_FIELD|xV|P(5),
 443        OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
 444        OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
 445        OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
 446        OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
 447        OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 448#define TI_LESS     (OC_COMPARE|VV|P(39)|2)
 449        TI_LESS,                 OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 450#define TI_TERNARY  (OC_TERNARY|Vx|P(64)|'?')
 451#define TI_COLON    (OC_COLON|xx|P(67)|':')
 452        OC_LOR|Vx|P(59),         TI_TERNARY,              TI_COLON,
 453#define TI_IN       (OC_IN|SV|P(49))
 454        TI_IN,
 455#define TI_COMMA    (OC_COMMA|SS|P(80))
 456        TI_COMMA,
 457#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
 458        TI_PGETLINE,
 459        OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 460        0, /* ] */
 461        0,
 462        0,
 463        0,
 464        0, /* \n */
 465        ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 466        OC_CONTINUE,  OC_DELETE|Rx, OC_PRINT,
 467        OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
 468        OC_RETURN|Vx, OC_EXIT|Nx,
 469        ST_WHILE,
 470        0, /* else */
 471// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
 472//  Highest byte bit pattern: nn s3s2s1 v3v2v1
 473//  nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
 474// OC_F's are builtins with zero or one argument.
 475//  |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
 476//  Check for no args is present in builtins' code (not in this table): rand, systime
 477//  Have one _optional_ arg: fflush, srand, length
 478#define OC_B   OC_BUILTIN
 479#define OC_F   OC_FBLTIN
 480#define A1     P(0x40) /*one arg*/
 481#define A2     P(0x80) /*two args*/
 482#define A3     P(0xc0) /*three args*/
 483#define __v    P(1)
 484#define _vv    P(3)
 485#define __s__v P(9)
 486#define __s_vv P(0x0b)
 487#define __svvv P(0x0f)
 488#define _ss_vv P(0x1b)
 489#define _s_vv_ P(0x16)
 490#define ss_vv_ P(0x36)
 491        OC_B|B_an|_vv|A2,   OC_B|B_co|__v|A1,   OC_B|B_ls|_vv|A2,   OC_B|B_or|_vv|A2,   // and    compl   lshift   or
 492        OC_B|B_rs|_vv|A2,   OC_B|B_xo|_vv|A2,                                           // rshift xor
 493        OC_F|F_cl|Sx|Rx,    OC_F|F_sy|Sx|Rx,    OC_F|F_ff|Sx,       OC_B|B_a2|_vv|A2,   // close  system  fflush   atan2
 494        OC_F|F_co|Nx|Rx,    OC_F|F_ex|Nx|Rx,    OC_F|F_in|Nx|Rx,    OC_F|F_lg|Nx|Rx,    // cos    exp     int      log
 495        OC_F|F_rn,          OC_F|F_si|Nx|Rx,    OC_F|F_sq|Nx|Rx,    OC_F|F_sr|Nx,       // rand   sin     sqrt     srand
 496        OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2,                    // gensub gsub    index  /*length was here*/
 497        OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF,         OC_B|B_su|ss_vv_|A2,// match  split   sprintf  sub
 498        OC_B|B_ss|__svvv|A2,OC_F|F_ti,          OC_B|B_ti|__s_vv,   OC_B|B_mt|__s_vv,   // substr systime strftime mktime
 499        OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1,                                        // tolower toupper
 500        OC_F|F_le|Sx,   // length
 501        OC_GETLINE|SV,  // getline
 502        0, 0, // func function
 503        0, // BEGIN
 504        0  // END
 505#undef A1
 506#undef A2
 507#undef A3
 508#undef OC_B
 509#undef OC_F
 510};
 511
 512/* internal variable names and their initial values       */
 513/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 514enum {
 515        CONVFMT,    OFMT,       FS,         OFS,
 516        ORS,        RS,         RT,         FILENAME,
 517        SUBSEP,     F0,         ARGIND,     ARGC,
 518        ARGV,       ERRNO,      FNR,        NR,
 519        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 520};
 521
 522static const char vNames[] ALIGN1 =
 523        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 524        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 525        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 526        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 527        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 528
 529static const char vValues[] ALIGN1 =
 530        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 531        "\n\0"      "\n\0"      "\0"        "\0"
 532        "\034\0"    "\0"        "\377";
 533
 534/* hash size may grow to these values */
 535#define FIRST_PRIME 61
 536static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 537
 538
 539/* Globals. Split in two parts so that first one is addressed
 540 * with (mostly short) negative offsets.
 541 * NB: it's unsafe to put members of type "double"
 542 * into globals2 (gcc may fail to align them).
 543 */
 544struct globals {
 545        double t_double;
 546        chain beginseq, mainseq, endseq;
 547        chain *seq;
 548        node *break_ptr, *continue_ptr;
 549        rstream *iF;
 550        xhash *ahash;  /* argument names, used only while parsing function bodies */
 551        xhash *fnhash; /* function names, used only in parsing stage */
 552        xhash *vhash;  /* variables and arrays */
 553        //xhash *fdhash; /* file objects, used only in execution stage */
 554        //we are reusing ahash as fdhash, via define (see later)
 555        const char *g_progname;
 556        int g_lineno;
 557        int nfields;
 558        int maxfields; /* used in fsrealloc() only */
 559        var *Fields;
 560        char *g_pos;
 561        char g_saved_ch;
 562        smallint icase;
 563        smallint exiting;
 564        smallint nextrec;
 565        smallint nextfile;
 566        smallint is_f0_split;
 567        smallint t_rollback;
 568
 569        /* former statics from various functions */
 570        smallint next_token__concat_inserted;
 571        uint32_t next_token__save_tclass;
 572        uint32_t next_token__save_info;
 573};
 574struct globals2 {
 575        uint32_t t_info; /* often used */
 576        uint32_t t_tclass;
 577        char *t_string;
 578        int t_lineno;
 579
 580        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 581
 582        /* former statics from various functions */
 583        char *split_f0__fstrings;
 584
 585        rstream next_input_file__rsm;
 586        smallint next_input_file__files_happen;
 587
 588        smalluint exitcode;
 589
 590        unsigned evaluate__seed;
 591        var *evaluate__fnargs;
 592        regex_t evaluate__sreg;
 593
 594        var ptest__tmpvar;
 595        var awk_printf__tmpvar;
 596        var as_regex__tmpvar;
 597        var exit__tmpvar;
 598        var main__tmpvar;
 599
 600        tsplitter exec_builtin__tspl;
 601
 602        /* biggest and least used members go last */
 603        tsplitter fsplitter, rsplitter;
 604
 605        char g_buf[MAXVARFMT + 1];
 606};
 607#define G1 (ptr_to_globals[-1])
 608#define G (*(struct globals2 *)ptr_to_globals)
 609/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 610//char G1size[sizeof(G1)]; // 0x70
 611//char Gsize[sizeof(G)]; // 0x2f8
 612/* Trying to keep most of members accessible with short offsets: */
 613//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
 614#define t_double     (G1.t_double    )
 615#define beginseq     (G1.beginseq    )
 616#define mainseq      (G1.mainseq     )
 617#define endseq       (G1.endseq      )
 618#define seq          (G1.seq         )
 619#define break_ptr    (G1.break_ptr   )
 620#define continue_ptr (G1.continue_ptr)
 621#define iF           (G1.iF          )
 622#define ahash        (G1.ahash       )
 623#define fnhash       (G1.fnhash      )
 624#define vhash        (G1.vhash       )
 625#define fdhash       ahash
 626//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
 627// and ends up empty after parsing phase. Thus, we can simply reuse it
 628// for fdhash in execution stage.
 629#define g_progname   (G1.g_progname  )
 630#define g_lineno     (G1.g_lineno    )
 631#define nfields      (G1.nfields     )
 632#define maxfields    (G1.maxfields   )
 633#define Fields       (G1.Fields      )
 634#define g_pos        (G1.g_pos       )
 635#define g_saved_ch   (G1.g_saved_ch  )
 636#define icase        (G1.icase       )
 637#define exiting      (G1.exiting     )
 638#define nextrec      (G1.nextrec     )
 639#define nextfile     (G1.nextfile    )
 640#define is_f0_split  (G1.is_f0_split )
 641#define t_rollback   (G1.t_rollback  )
 642#define t_info       (G.t_info      )
 643#define t_tclass     (G.t_tclass    )
 644#define t_string     (G.t_string    )
 645#define t_lineno     (G.t_lineno    )
 646#define intvar       (G.intvar      )
 647#define fsplitter    (G.fsplitter   )
 648#define rsplitter    (G.rsplitter   )
 649#define g_buf        (G.g_buf       )
 650#define INIT_G() do { \
 651        SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
 652        t_tclass = TC_NEWLINE; \
 653        G.evaluate__seed = 1; \
 654} while (0)
 655
 656static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 657static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 658static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 659static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 660static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
 661static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 662static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 663static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 664static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 665static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
 666
 667static int awk_exit(void) NORETURN;
 668
 669static void syntax_error(const char *message) NORETURN;
 670static void syntax_error(const char *message)
 671{
 672        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 673}
 674
 675/* ---- hash stuff ---- */
 676
 677static unsigned hashidx(const char *name)
 678{
 679        unsigned idx = 0;
 680
 681        while (*name)
 682                idx = *name++ + (idx << 6) - idx;
 683        return idx;
 684}
 685
 686/* create new hash */
 687static xhash *hash_init(void)
 688{
 689        xhash *newhash;
 690
 691        newhash = xzalloc(sizeof(*newhash));
 692        newhash->csize = FIRST_PRIME;
 693        newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
 694
 695        return newhash;
 696}
 697
 698static void hash_clear(xhash *hash)
 699{
 700        unsigned i;
 701        hash_item *hi, *thi;
 702
 703        for (i = 0; i < hash->csize; i++) {
 704                hi = hash->items[i];
 705                while (hi) {
 706                        thi = hi;
 707                        hi = hi->next;
 708//FIXME: this assumes that it's a hash of *variables*:
 709                        free(thi->data.v.string);
 710                        free(thi);
 711                }
 712                hash->items[i] = NULL;
 713        }
 714        hash->glen = hash->nel = 0;
 715}
 716
 717#if 0 //UNUSED
 718static void hash_free(xhash *hash)
 719{
 720        hash_clear(hash);
 721        free(hash->items);
 722        free(hash);
 723}
 724#endif
 725
 726/* find item in hash, return ptr to data, NULL if not found */
 727static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
 728{
 729        hash_item *hi;
 730
 731        hi = hash->items[idx % hash->csize];
 732        while (hi) {
 733                if (strcmp(hi->name, name) == 0)
 734                        return &hi->data;
 735                hi = hi->next;
 736        }
 737        return NULL;
 738}
 739
 740static void *hash_search(xhash *hash, const char *name)
 741{
 742        return hash_search3(hash, name, hashidx(name));
 743}
 744
 745/* grow hash if it becomes too big */
 746static void hash_rebuild(xhash *hash)
 747{
 748        unsigned newsize, i, idx;
 749        hash_item **newitems, *hi, *thi;
 750
 751        if (hash->nprime == ARRAY_SIZE(PRIMES))
 752                return;
 753
 754        newsize = PRIMES[hash->nprime++];
 755        newitems = xzalloc(newsize * sizeof(newitems[0]));
 756
 757        for (i = 0; i < hash->csize; i++) {
 758                hi = hash->items[i];
 759                while (hi) {
 760                        thi = hi;
 761                        hi = thi->next;
 762                        idx = hashidx(thi->name) % newsize;
 763                        thi->next = newitems[idx];
 764                        newitems[idx] = thi;
 765                }
 766        }
 767
 768        free(hash->items);
 769        hash->csize = newsize;
 770        hash->items = newitems;
 771}
 772
 773/* find item in hash, add it if necessary. Return ptr to data */
 774static void *hash_find(xhash *hash, const char *name)
 775{
 776        hash_item *hi;
 777        unsigned idx;
 778        int l;
 779
 780        idx = hashidx(name);
 781        hi = hash_search3(hash, name, idx);
 782        if (!hi) {
 783                if (++hash->nel > hash->csize * 8)
 784                        hash_rebuild(hash);
 785
 786                l = strlen(name) + 1;
 787                hi = xzalloc(sizeof(*hi) + l);
 788                strcpy(hi->name, name);
 789
 790                idx = idx % hash->csize;
 791                hi->next = hash->items[idx];
 792                hash->items[idx] = hi;
 793                hash->glen += l;
 794        }
 795        return &hi->data;
 796}
 797
 798#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 799#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 800#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 801#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 802
 803static void hash_remove(xhash *hash, const char *name)
 804{
 805        hash_item *hi, **phi;
 806
 807        phi = &hash->items[hashidx(name) % hash->csize];
 808        while (*phi) {
 809                hi = *phi;
 810                if (strcmp(hi->name, name) == 0) {
 811                        hash->glen -= (strlen(name) + 1);
 812                        hash->nel--;
 813                        *phi = hi->next;
 814                        free(hi);
 815                        break;
 816                }
 817                phi = &hi->next;
 818        }
 819}
 820
 821/* ------ some useful functions ------ */
 822
 823static char *skip_spaces(char *p)
 824{
 825        for (;;) {
 826                if (*p == '\\' && p[1] == '\n') {
 827                        p++;
 828                        t_lineno++;
 829                } else if (*p != ' ' && *p != '\t') {
 830                        break;
 831                }
 832                p++;
 833        }
 834        return p;
 835}
 836
 837/* returns old *s, advances *s past word and terminating NUL */
 838static char *nextword(char **s)
 839{
 840        char *p = *s;
 841        char *q = p;
 842        while (*q++ != '\0')
 843                continue;
 844        *s = q;
 845        return p;
 846}
 847
 848static char nextchar(char **s)
 849{
 850        char c, *pps;
 851 again:
 852        c = *(*s)++;
 853        pps = *s;
 854        if (c == '\\')
 855                c = bb_process_escape_sequence((const char**)s);
 856        /* Example awk statement:
 857         * s = "abc\"def"
 858         * we must treat \" as "
 859         */
 860        if (c == '\\' && *s == pps) { /* unrecognized \z? */
 861                c = *(*s); /* yes, fetch z */
 862                if (c) { /* advance unless z = NUL */
 863                        (*s)++;
 864                        if (c == '\n') /* \<newline>? eat it */
 865                                goto again;
 866                }
 867        }
 868        return c;
 869}
 870
 871/* TODO: merge with strcpy_and_process_escape_sequences()?
 872 */
 873static void unescape_string_in_place(char *s1)
 874{
 875        char *s = s1;
 876        while ((*s1 = nextchar(&s)) != '\0')
 877                s1++;
 878}
 879
 880static ALWAYS_INLINE int isalnum_(int c)
 881{
 882        return (isalnum(c) || c == '_');
 883}
 884
 885static double my_strtod(char **pp)
 886{
 887        char *cp = *pp;
 888        return strtod(cp, pp);
 889}
 890#if ENABLE_DESKTOP
 891static double my_strtod_or_hexoct(char **pp)
 892{
 893        char *cp = *pp;
 894        if (cp[0] == '0') {
 895                /* Might be hex or octal integer: 0x123abc or 07777 */
 896                char c = (cp[1] | 0x20);
 897                if (c == 'x' || isdigit(cp[1])) {
 898                        unsigned long long ull = strtoull(cp, pp, 0);
 899                        if (c == 'x')
 900                                return ull;
 901                        c = **pp;
 902                        if (!isdigit(c) && c != '.')
 903                                return ull;
 904                        /* else: it may be a floating number. Examples:
 905                         * 009.123 (*pp points to '9')
 906                         * 000.123 (*pp points to '.')
 907                         * fall through to strtod.
 908                         */
 909                }
 910        }
 911        return strtod(cp, pp);
 912}
 913#else
 914# define my_strtod_or_hexoct(p) my_strtod(p)
 915#endif
 916
 917/* -------- working with variables (set/get/copy/etc) -------- */
 918
 919static void fmt_num(const char *format, double n)
 920{
 921        if (n == (long long)n) {
 922                snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
 923        } else {
 924                const char *s = format;
 925                char c;
 926
 927                do { c = *s; } while (c && *++s);
 928                if (strchr("diouxX", c)) {
 929                        snprintf(g_buf, MAXVARFMT, format, (int)n);
 930                } else if (strchr("eEfFgGaA", c)) {
 931                        snprintf(g_buf, MAXVARFMT, format, n);
 932                } else {
 933                        syntax_error(EMSG_INV_FMT);
 934                }
 935        }
 936}
 937
 938static xhash *iamarray(var *a)
 939{
 940        while (a->type & VF_CHILD)
 941                a = a->x.parent;
 942
 943        if (!(a->type & VF_ARRAY)) {
 944                a->type |= VF_ARRAY;
 945                a->x.array = hash_init();
 946        }
 947        return a->x.array;
 948}
 949
 950#define clear_array(array) hash_clear(array)
 951
 952/* clear a variable */
 953static var *clrvar(var *v)
 954{
 955        if (!(v->type & VF_FSTR))
 956                free(v->string);
 957
 958        v->type &= VF_DONTTOUCH;
 959        v->type |= VF_DIRTY;
 960        v->string = NULL;
 961        return v;
 962}
 963
 964static void handle_special(var *);
 965
 966/* assign string value to variable */
 967static var *setvar_p(var *v, char *value)
 968{
 969        clrvar(v);
 970        v->string = value;
 971        handle_special(v);
 972        return v;
 973}
 974
 975/* same as setvar_p but make a copy of string */
 976static var *setvar_s(var *v, const char *value)
 977{
 978        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 979}
 980
 981/* same as setvar_s but sets USER flag */
 982static var *setvar_u(var *v, const char *value)
 983{
 984        v = setvar_s(v, value);
 985        v->type |= VF_USER;
 986        return v;
 987}
 988
 989/* set array element to user string */
 990static void setari_u(var *a, int idx, const char *s)
 991{
 992        var *v;
 993
 994        v = findvar(iamarray(a), itoa(idx));
 995        setvar_u(v, s);
 996}
 997
 998/* assign numeric value to variable */
 999static var *setvar_i(var *v, double value)
1000{
1001        clrvar(v);
1002        v->type |= VF_NUMBER;
1003        v->number = value;
1004        handle_special(v);
1005        return v;
1006}
1007
1008static const char *getvar_s(var *v)
1009{
1010        /* if v is numeric and has no cached string, convert it to string */
1011        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
1012                fmt_num(getvar_s(intvar[CONVFMT]), v->number);
1013                v->string = xstrdup(g_buf);
1014                v->type |= VF_CACHED;
1015        }
1016        return (v->string == NULL) ? "" : v->string;
1017}
1018
1019static double getvar_i(var *v)
1020{
1021        char *s;
1022
1023        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
1024                v->number = 0;
1025                s = v->string;
1026                if (s && *s) {
1027                        debug_printf_eval("getvar_i: '%s'->", s);
1028                        v->number = my_strtod(&s);
1029                        /* ^^^ hex/oct NOT allowed here! */
1030                        debug_printf_eval("%f (s:'%s')\n", v->number, s);
1031                        if (v->type & VF_USER) {
1032//TODO: skip_spaces() also skips backslash+newline, is it intended here?
1033                                s = skip_spaces(s);
1034                                if (*s != '\0')
1035                                        v->type &= ~VF_USER;
1036                        }
1037                } else {
1038                        debug_printf_eval("getvar_i: '%s'->zero\n", s);
1039                        v->type &= ~VF_USER;
1040                }
1041                v->type |= VF_CACHED;
1042        }
1043        debug_printf_eval("getvar_i: %f\n", v->number);
1044        return v->number;
1045}
1046
1047/* Used for operands of bitwise ops */
1048static unsigned long getvar_i_int(var *v)
1049{
1050        double d = getvar_i(v);
1051
1052        /* Casting doubles to longs is undefined for values outside
1053         * of target type range. Try to widen it as much as possible */
1054        if (d >= 0)
1055                return (unsigned long)d;
1056        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
1057        return - (long) (unsigned long) (-d);
1058}
1059
1060static var *copyvar(var *dest, const var *src)
1061{
1062        if (dest != src) {
1063                clrvar(dest);
1064                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
1065                debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
1066                dest->number = src->number;
1067                if (src->string)
1068                        dest->string = xstrdup(src->string);
1069        }
1070        handle_special(dest);
1071        return dest;
1072}
1073
1074static var *incvar(var *v)
1075{
1076        return setvar_i(v, getvar_i(v) + 1.0);
1077}
1078
1079/* return true if v is number or numeric string */
1080static int is_numeric(var *v)
1081{
1082        getvar_i(v);
1083        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
1084}
1085
1086/* return 1 when value of v corresponds to true, 0 otherwise */
1087static int istrue(var *v)
1088{
1089        if (is_numeric(v))
1090                return (v->number != 0);
1091        return (v->string && v->string[0]);
1092}
1093
1094/* ------- awk program text parsing ------- */
1095
1096/* Parse next token pointed by global pos, place results into global t_XYZ variables.
1097 * If token isn't expected, print error message and die.
1098 * Return token class (also store it in t_tclass).
1099 */
1100static uint32_t next_token(uint32_t expected)
1101{
1102#define concat_inserted (G1.next_token__concat_inserted)
1103#define save_tclass     (G1.next_token__save_tclass)
1104#define save_info       (G1.next_token__save_info)
1105
1106        char *p;
1107        const char *tl;
1108        const uint32_t *ti;
1109        uint32_t tc, last_token_class;
1110
1111        last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
1112
1113        debug_printf_parse("%s() expected(%x):", __func__, expected);
1114        debug_parse_print_tc(expected);
1115        debug_printf_parse("\n");
1116
1117        if (t_rollback) {
1118                debug_printf_parse("%s: using rolled-back token\n", __func__);
1119                t_rollback = FALSE;
1120        } else if (concat_inserted) {
1121                debug_printf_parse("%s: using concat-inserted token\n", __func__);
1122                concat_inserted = FALSE;
1123                t_tclass = save_tclass;
1124                t_info = save_info;
1125        } else {
1126                p = g_pos;
1127                if (g_saved_ch != '\0') {
1128                        *p = g_saved_ch;
1129                        g_saved_ch = '\0';
1130                }
1131 readnext:
1132                p = skip_spaces(p);
1133                g_lineno = t_lineno;
1134                if (*p == '#')
1135                        while (*p != '\n' && *p != '\0')
1136                                p++;
1137
1138                if (*p == '\0') {
1139                        tc = TC_EOF;
1140                        debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1141                } else if (*p == '"') {
1142                        /* it's a string */
1143                        char *s = t_string = ++p;
1144                        while (*p != '"') {
1145                                char *pp;
1146                                if (*p == '\0' || *p == '\n')
1147                                        syntax_error(EMSG_UNEXP_EOS);
1148                                pp = p;
1149                                *s++ = nextchar(&pp);
1150                                p = pp;
1151                        }
1152                        p++;
1153                        *s = '\0';
1154                        tc = TC_STRING;
1155                        debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1156                } else if ((expected & TC_REGEXP) && *p == '/') {
1157                        /* it's regexp */
1158                        char *s = t_string = ++p;
1159                        while (*p != '/') {
1160                                if (*p == '\0' || *p == '\n')
1161                                        syntax_error(EMSG_UNEXP_EOS);
1162                                *s = *p++;
1163                                if (*s++ == '\\') {
1164                                        char *pp = p;
1165                                        s[-1] = bb_process_escape_sequence((const char **)&pp);
1166                                        if (*p == '\\')
1167                                                *s++ = '\\';
1168                                        if (pp == p)
1169                                                *s++ = *p++;
1170                                        else
1171                                                p = pp;
1172                                }
1173                        }
1174                        p++;
1175                        *s = '\0';
1176                        tc = TC_REGEXP;
1177                        debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1178
1179                } else if (*p == '.' || isdigit(*p)) {
1180                        /* it's a number */
1181                        char *pp = p;
1182                        t_double = my_strtod_or_hexoct(&pp);
1183                        /* ^^^ awk only allows hex/oct consts in _program_, not in _input_ */
1184                        p = pp;
1185                        if (*p == '.')
1186                                syntax_error(EMSG_UNEXP_TOKEN);
1187                        tc = TC_NUMBER;
1188                        debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1189                } else {
1190                        char *end_of_name;
1191
1192                        if (*p == '\n')
1193                                t_lineno++;
1194
1195                        /* search for something known */
1196                        tl = tokenlist;
1197                        tc = 0x00000001;
1198                        ti = tokeninfo;
1199                        while (*tl) {
1200                                int l = (unsigned char) *tl++;
1201                                if (l == (unsigned char) NTCC) {
1202                                        tc <<= 1;
1203                                        continue;
1204                                }
1205                                /* if token class is expected,
1206                                 * token matches,
1207                                 * and it's not a longer word,
1208                                 */
1209                                if ((tc & (expected | TS_WORD | TC_NEWLINE))
1210                                 && strncmp(p, tl, l) == 0
1211                                 && !((tc & TS_WORD) && isalnum_(p[l]))
1212                                ) {
1213                                        /* then this is what we are looking for */
1214                                        t_info = *ti;
1215                                        debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1216                                        p += l;
1217                                        goto token_found;
1218                                }
1219                                ti++;
1220                                tl += l;
1221                        }
1222                        /* not a known token */
1223
1224                        /* is it a name? (var/array/function) */
1225                        if (!isalnum_(*p))
1226                                syntax_error(EMSG_UNEXP_TOKEN); /* no */
1227                        /* yes */
1228                        t_string = p;
1229                        while (isalnum_(*p))
1230                                p++;
1231                        end_of_name = p;
1232
1233                        if (last_token_class == TC_FUNCDECL)
1234                                /* eat space in "function FUNC (...) {...}" declaration */
1235                                p = skip_spaces(p);
1236                        else if (expected & TC_ARRAY) {
1237                                /* eat space between array name and [ */
1238                                char *s = skip_spaces(p);
1239                                if (*s == '[') /* array ref, not just a name? */
1240                                        p = s;
1241                        }
1242                        /* else: do NOT consume whitespace after variable name!
1243                         * gawk allows definition "function FUNC (p) {...}" - note space,
1244                         * but disallows the call "FUNC (p)" because it isn't one -
1245                         * expression "v (a)" should NOT be parsed as TC_FUNCTION:
1246                         * it is a valid concatenation if "v" is a variable,
1247                         * not a function name (and type of name is not known at parse time).
1248                         */
1249
1250                        if (*p == '(') {
1251                                p++;
1252                                tc = TC_FUNCTION;
1253                                debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1254                        } else if (*p == '[') {
1255                                p++;
1256                                tc = TC_ARRAY;
1257                                debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1258                        } else {
1259                                tc = TC_VARIABLE;
1260                                debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1261                                if (end_of_name == p) {
1262                                        /* there is no space for trailing NUL in t_string!
1263                                         * We need to save the char we are going to NUL.
1264                                         * (we'll use it in future call to next_token())
1265                                         */
1266                                        g_saved_ch = *end_of_name;
1267// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
1268// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
1269// '.' and analyze it later: we also have to *store it back* in next
1270// next_token(), in order to give my_strtod() the undamaged ".2" string.
1271                                }
1272                        }
1273                        *end_of_name = '\0'; /* terminate t_string */
1274                }
1275 token_found:
1276                g_pos = p;
1277
1278                /* skipping newlines in some cases */
1279                if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1280                        goto readnext;
1281
1282                /* insert concatenation operator when needed */
1283                debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1284                        (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1285                        !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1286                if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1287                 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
1288                ) {
1289                        concat_inserted = TRUE;
1290                        save_tclass = tc;
1291                        save_info = t_info;
1292                        tc = TC_BINOPX;
1293                        t_info = OC_CONCAT | SS | P(35);
1294                }
1295
1296                t_tclass = tc;
1297                debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1298        }
1299        /* Are we ready for this? */
1300        if (!(t_tclass & expected)) {
1301                syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1302                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1303        }
1304
1305        debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1306        debug_parse_print_tc(t_tclass);
1307        debug_printf_parse("\n");
1308
1309        return t_tclass;
1310#undef concat_inserted
1311#undef save_tclass
1312#undef save_info
1313}
1314
1315static ALWAYS_INLINE void rollback_token(void)
1316{
1317        t_rollback = TRUE;
1318}
1319
1320static node *new_node(uint32_t info)
1321{
1322        node *n;
1323
1324        n = xzalloc(sizeof(node));
1325        n->info = info;
1326        n->lineno = g_lineno;
1327        return n;
1328}
1329
1330static void mk_re_node(const char *s, node *n, regex_t *re)
1331{
1332        n->info = TI_REGEXP;
1333        n->l.re = re;
1334        n->r.ire = re + 1;
1335        xregcomp(re, s, REG_EXTENDED);
1336        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1337}
1338
1339static node *parse_expr(uint32_t);
1340
1341static node *parse_lrparen_list(void)
1342{
1343        next_token(TC_LPAREN);
1344        return parse_expr(TC_RPAREN);
1345}
1346
1347/* parse expression terminated by given argument, return ptr
1348 * to built subtree. Terminator is eaten by parse_expr */
1349static node *parse_expr(uint32_t term_tc)
1350{
1351        node sn;
1352        node *cn = &sn;
1353        node *vn, *glptr;
1354        uint32_t tc, expected_tc;
1355        var *v;
1356
1357        debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1358        debug_parse_print_tc(term_tc);
1359        debug_printf_parse("\n");
1360
1361        sn.info = PRIMASK;
1362        sn.r.n = sn.a.n = glptr = NULL;
1363        expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1364
1365        while (!((tc = next_token(expected_tc)) & term_tc)) {
1366
1367                if (glptr && (t_info == TI_LESS)) {
1368                        /* input redirection (<) attached to glptr node */
1369                        debug_printf_parse("%s: input redir\n", __func__);
1370                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1371                        cn->a.n = glptr;
1372                        expected_tc = TS_OPERAND | TS_UOPPRE;
1373                        glptr = NULL;
1374                        continue;
1375                }
1376                if (tc & (TS_BINOP | TC_UOPPOST)) {
1377                        debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1378                        /* for binary and postfix-unary operators, jump back over
1379                         * previous operators with higher priority */
1380                        vn = cn;
1381                        while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1382                            || ((t_info == vn->info) && t_info == TI_COLON)
1383                        ) {
1384                                vn = vn->a.n;
1385                                if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1386                        }
1387                        if (t_info == TI_TERNARY)
1388//TODO: why?
1389                                t_info += P(6);
1390                        cn = vn->a.n->r.n = new_node(t_info);
1391                        cn->a.n = vn->a.n;
1392                        if (tc & TS_BINOP) {
1393                                cn->l.n = vn;
1394//FIXME: this is the place to detect and reject assignments to non-lvalues.
1395//Currently we allow "assignments" to consts and temporaries, nonsense like this:
1396// awk 'BEGIN { "qwe" = 1 }'
1397// awk 'BEGIN { 7 *= 7 }'
1398// awk 'BEGIN { length("qwe") = 1 }'
1399// awk 'BEGIN { (1+1) += 3 }'
1400                                expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1401                                if (t_info == TI_PGETLINE) {
1402                                        /* it's a pipe */
1403                                        next_token(TC_GETLINE);
1404                                        /* give maximum priority to this pipe */
1405                                        cn->info &= ~PRIMASK;
1406                                        expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1407                                }
1408                        } else {
1409                                cn->r.n = vn;
1410                                expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1411                        }
1412                        vn->a.n = cn;
1413                        continue;
1414                }
1415
1416                debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1417                /* for operands and prefix-unary operators, attach them
1418                 * to last node */
1419                vn = cn;
1420                cn = vn->r.n = new_node(t_info);
1421                cn->a.n = vn;
1422
1423                expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1424                if (t_info == TI_PREINC || t_info == TI_PREDEC)
1425                        expected_tc = TS_LVALUE | TC_UOPPRE1;
1426
1427                if (!(tc & (TS_OPERAND | TC_REGEXP)))
1428                        continue;
1429
1430                debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1431                expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1432                /* one should be very careful with switch on tclass -
1433                 * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1434                switch (tc) {
1435                case TC_VARIABLE:
1436                case TC_ARRAY:
1437                        debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1438                        cn->info = OC_VAR;
1439                        v = hash_search(ahash, t_string);
1440                        if (v != NULL) {
1441                                cn->info = OC_FNARG;
1442                                cn->l.aidx = v->x.aidx;
1443                        } else {
1444                                cn->l.v = newvar(t_string);
1445                        }
1446                        if (tc & TC_ARRAY) {
1447                                cn->info |= xS;
1448                                cn->r.n = parse_expr(TC_ARRTERM);
1449                        }
1450                        break;
1451
1452                case TC_NUMBER:
1453                case TC_STRING:
1454                        debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1455                        cn->info = OC_VAR;
1456                        v = cn->l.v = xzalloc(sizeof(var));
1457                        if (tc & TC_NUMBER)
1458                                setvar_i(v, t_double);
1459                        else {
1460                                setvar_s(v, t_string);
1461                                expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1462                        }
1463                        break;
1464
1465                case TC_REGEXP:
1466                        debug_printf_parse("%s: TC_REGEXP\n", __func__);
1467                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1468                        break;
1469
1470                case TC_FUNCTION:
1471                        debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1472                        cn->info = OC_FUNC;
1473                        cn->r.f = newfunc(t_string);
1474                        cn->l.n = parse_expr(TC_RPAREN);
1475                        break;
1476
1477                case TC_LPAREN:
1478                        debug_printf_parse("%s: TC_LPAREN\n", __func__);
1479                        cn = vn->r.n = parse_expr(TC_RPAREN);
1480                        if (!cn)
1481                                syntax_error("Empty sequence");
1482                        cn->a.n = vn;
1483                        break;
1484
1485                case TC_GETLINE:
1486                        debug_printf_parse("%s: TC_GETLINE\n", __func__);
1487                        glptr = cn;
1488                        expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1489                        break;
1490
1491                case TC_BUILTIN:
1492                        debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1493                        cn->l.n = parse_lrparen_list();
1494                        break;
1495
1496                case TC_LENGTH:
1497                        debug_printf_parse("%s: TC_LENGTH\n", __func__);
1498                        tc = next_token(TC_LPAREN /* length(...) */
1499                                | TC_SEMICOL   /* length; */
1500                                | TC_NEWLINE   /* length<newline> */
1501                                | TC_RBRACE    /* length } */
1502                                | TC_BINOPX    /* length <op> NUM */
1503                                | TC_COMMA     /* print length, 1 */
1504                        );
1505                        if (tc != TC_LPAREN)
1506                                rollback_token();
1507                        else {
1508                                /* It was a "(" token. Handle just like TC_BUILTIN */
1509                                cn->l.n = parse_expr(TC_RPAREN);
1510                        }
1511                        break;
1512                }
1513        } /* while() */
1514
1515        debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1516        return sn.r.n;
1517}
1518
1519/* add node to chain. Return ptr to alloc'd node */
1520static node *chain_node(uint32_t info)
1521{
1522        node *n;
1523
1524        if (!seq->first)
1525                seq->first = seq->last = new_node(0);
1526
1527        if (seq->programname != g_progname) {
1528                seq->programname = g_progname;
1529                n = chain_node(OC_NEWSOURCE);
1530                n->l.new_progname = g_progname;
1531        }
1532
1533        n = seq->last;
1534        n->info = info;
1535        seq->last = n->a.n = new_node(OC_DONE);
1536
1537        return n;
1538}
1539
1540static void chain_expr(uint32_t info)
1541{
1542        node *n;
1543
1544        n = chain_node(info);
1545
1546        n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1547        if ((info & OF_REQUIRED) && !n->l.n)
1548                syntax_error(EMSG_TOO_FEW_ARGS);
1549
1550        if (t_tclass & TC_RBRACE)
1551                rollback_token();
1552}
1553
1554static void chain_group(void);
1555
1556static node *chain_loop(node *nn)
1557{
1558        node *n, *n2, *save_brk, *save_cont;
1559
1560        save_brk = break_ptr;
1561        save_cont = continue_ptr;
1562
1563        n = chain_node(OC_BR | Vx);
1564        continue_ptr = new_node(OC_EXEC);
1565        break_ptr = new_node(OC_EXEC);
1566        chain_group();
1567        n2 = chain_node(OC_EXEC | Vx);
1568        n2->l.n = nn;
1569        n2->a.n = n;
1570        continue_ptr->a.n = n2;
1571        break_ptr->a.n = n->r.n = seq->last;
1572
1573        continue_ptr = save_cont;
1574        break_ptr = save_brk;
1575
1576        return n;
1577}
1578
1579static void chain_until_rbrace(void)
1580{
1581        uint32_t tc;
1582        while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1583                debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1584                if (tc == TC_NEWLINE)
1585                        continue;
1586                rollback_token();
1587                chain_group();
1588        }
1589        debug_printf_parse("%s: TC_RBRACE\n", __func__);
1590}
1591
1592/* parse group and attach it to chain */
1593static void chain_group(void)
1594{
1595        uint32_t tc;
1596        node *n, *n2, *n3;
1597
1598        do {
1599                tc = next_token(TS_GRPSEQ);
1600        } while (tc == TC_NEWLINE);
1601
1602        if (tc == TC_LBRACE) {
1603                debug_printf_parse("%s: TC_LBRACE\n", __func__);
1604                chain_until_rbrace();
1605                return;
1606        }
1607        if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1608                debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1609                rollback_token();
1610                chain_expr(OC_EXEC | Vx);
1611                return;
1612        }
1613
1614        /* TS_STATEMNT */
1615        debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1616        switch (t_info & OPCLSMASK) {
1617        case ST_IF:
1618                debug_printf_parse("%s: ST_IF\n", __func__);
1619                n = chain_node(OC_BR | Vx);
1620                n->l.n = parse_lrparen_list();
1621                chain_group();
1622                n2 = chain_node(OC_EXEC);
1623                n->r.n = seq->last;
1624                if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1625                        chain_group();
1626                        n2->a.n = seq->last;
1627                } else {
1628                        rollback_token();
1629                }
1630                break;
1631
1632        case ST_WHILE:
1633                debug_printf_parse("%s: ST_WHILE\n", __func__);
1634                n2 = parse_lrparen_list();
1635                n = chain_loop(NULL);
1636                n->l.n = n2;
1637                break;
1638
1639        case ST_DO:
1640                debug_printf_parse("%s: ST_DO\n", __func__);
1641                n2 = chain_node(OC_EXEC);
1642                n = chain_loop(NULL);
1643                n2->a.n = n->a.n;
1644                next_token(TC_WHILE);
1645                n->l.n = parse_lrparen_list();
1646                break;
1647
1648        case ST_FOR:
1649                debug_printf_parse("%s: ST_FOR\n", __func__);
1650                next_token(TC_LPAREN);
1651                n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1652                if (t_tclass & TC_RPAREN) {     /* for (I in ARRAY) */
1653                        if (!n2 || n2->info != TI_IN)
1654                                syntax_error(EMSG_UNEXP_TOKEN);
1655                        n = chain_node(OC_WALKINIT | VV);
1656                        n->l.n = n2->l.n;
1657                        n->r.n = n2->r.n;
1658                        n = chain_loop(NULL);
1659                        n->info = OC_WALKNEXT | Vx;
1660                        n->l.n = n2->l.n;
1661                } else {                        /* for (;;) */
1662                        n = chain_node(OC_EXEC | Vx);
1663                        n->l.n = n2;
1664                        n2 = parse_expr(TC_SEMICOL);
1665                        n3 = parse_expr(TC_RPAREN);
1666                        n = chain_loop(n3);
1667                        n->l.n = n2;
1668                        if (!n2)
1669                                n->info = OC_EXEC;
1670                }
1671                break;
1672
1673        case OC_PRINT:
1674        case OC_PRINTF:
1675                debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1676                n = chain_node(t_info);
1677                n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1678                if (t_tclass & TC_OUTRDR) {
1679                        n->info |= t_info;
1680                        n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1681                }
1682                if (t_tclass & TC_RBRACE)
1683                        rollback_token();
1684                break;
1685
1686        case OC_BREAK:
1687                debug_printf_parse("%s: OC_BREAK\n", __func__);
1688                n = chain_node(OC_EXEC);
1689                if (!break_ptr)
1690                        syntax_error("'break' not in a loop");
1691                n->a.n = break_ptr;
1692                chain_expr(t_info);
1693                break;
1694
1695        case OC_CONTINUE:
1696                debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1697                n = chain_node(OC_EXEC);
1698                if (!continue_ptr)
1699                        syntax_error("'continue' not in a loop");
1700                n->a.n = continue_ptr;
1701                chain_expr(t_info);
1702                break;
1703
1704        /* delete, next, nextfile, return, exit */
1705        default:
1706                debug_printf_parse("%s: default\n", __func__);
1707                chain_expr(t_info);
1708        }
1709}
1710
1711static void parse_program(char *p)
1712{
1713        debug_printf_parse("%s()\n", __func__);
1714
1715        g_pos = p;
1716        t_lineno = 1;
1717        for (;;) {
1718                uint32_t tclass;
1719
1720                tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1721                        | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1722 got_tok:
1723                if (tclass == TC_EOF) {
1724                        debug_printf_parse("%s: TC_EOF\n", __func__);
1725                        break;
1726                }
1727                if (tclass == TC_NEWLINE) {
1728                        debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1729                        continue;
1730                }
1731                if (tclass == TC_BEGIN) {
1732                        debug_printf_parse("%s: TC_BEGIN\n", __func__);
1733                        seq = &beginseq;
1734                        /* ensure there is no newline between BEGIN and { */
1735                        next_token(TC_LBRACE);
1736                        chain_until_rbrace();
1737                        goto next_tok;
1738                }
1739                if (tclass == TC_END) {
1740                        debug_printf_parse("%s: TC_END\n", __func__);
1741                        seq = &endseq;
1742                        /* ensure there is no newline between END and { */
1743                        next_token(TC_LBRACE);
1744                        chain_until_rbrace();
1745                        goto next_tok;
1746                }
1747                if (tclass == TC_FUNCDECL) {
1748                        func *f;
1749
1750                        debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1751                        next_token(TC_FUNCTION);
1752                        f = newfunc(t_string);
1753                        if (f->defined)
1754                                syntax_error("Duplicate function");
1755                        f->defined = 1;
1756                        //f->body.first = NULL; - already is
1757                        //f->nargs = 0; - already is
1758                        /* func arg list: comma sep list of args, and a close paren */
1759                        for (;;) {
1760                                var *v;
1761                                if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1762                                        if (f->nargs == 0)
1763                                                break; /* func() is ok */
1764                                        /* func(a,) is not ok */
1765                                        syntax_error(EMSG_UNEXP_TOKEN);
1766                                }
1767                                v = findvar(ahash, t_string);
1768                                v->x.aidx = f->nargs++;
1769                                /* Arg followed either by end of arg list or 1 comma */
1770                                if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1771                                        break;
1772                                /* it was a comma, we ate it */
1773                        }
1774                        seq = &f->body;
1775                        /* ensure there is { after "func F(...)" - but newlines are allowed */
1776                        while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1777                                continue;
1778                        chain_until_rbrace();
1779                        hash_clear(ahash);
1780                        goto next_tok;
1781                }
1782                seq = &mainseq;
1783                if (tclass & TS_OPSEQ) {
1784                        node *cn;
1785
1786                        debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1787                        rollback_token();
1788                        cn = chain_node(OC_TEST);
1789                        cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1790                        if (t_tclass == TC_LBRACE) {
1791                                debug_printf_parse("%s: TC_LBRACE\n", __func__);
1792                                chain_until_rbrace();
1793                        } else {
1794                                /* no action, assume default "{ print }" */
1795                                debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1796                                chain_node(OC_PRINT);
1797                        }
1798                        cn->r.n = mainseq.last;
1799                        goto next_tok;
1800                }
1801                /* tclass == TC_LBRACE */
1802                debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1803                chain_until_rbrace();
1804 next_tok:
1805                /* Same as next_token() at the top of the loop, + TC_SEMICOL */
1806                tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1807                        | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1808                /* gawk allows many newlines, but does not allow more than one semicolon:
1809                 *  BEGIN {...}<newline>;<newline>;
1810                 * would complain "each rule must have a pattern or an action part".
1811                 * Same message for
1812                 *  ; BEGIN {...}
1813                 */
1814                if (tclass != TC_SEMICOL)
1815                        goto got_tok; /* use this token */
1816                /* else: loop back - ate the semicolon, get and use _next_ token */
1817        } /* for (;;) */
1818}
1819
1820/* -------- program execution part -------- */
1821
1822/* temporary variables allocator */
1823static var *nvalloc(int sz)
1824{
1825        return xzalloc(sz * sizeof(var));
1826}
1827
1828static void nvfree(var *v, int sz)
1829{
1830        var *p = v;
1831
1832        while (--sz >= 0) {
1833                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1834                        clear_array(iamarray(p));
1835                        free(p->x.array->items);
1836                        free(p->x.array);
1837                }
1838                if (p->type & VF_WALK) {
1839                        walker_list *n;
1840                        walker_list *w = p->x.walker;
1841                        debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1842                        p->x.walker = NULL;
1843                        while (w) {
1844                                n = w->prev;
1845                                debug_printf_walker(" free(%p)\n", w);
1846                                free(w);
1847                                w = n;
1848                        }
1849                }
1850                clrvar(p);
1851                p++;
1852        }
1853
1854        free(v);
1855}
1856
1857static node *mk_splitter(const char *s, tsplitter *spl)
1858{
1859        regex_t *re, *ire;
1860        node *n;
1861
1862        re = &spl->re[0];
1863        ire = &spl->re[1];
1864        n = &spl->n;
1865        if (n->info == TI_REGEXP) {
1866                regfree(re);
1867                regfree(ire); // TODO: nuke ire, use re+1?
1868        }
1869        if (s[0] && s[1]) { /* strlen(s) > 1 */
1870                mk_re_node(s, n, re);
1871        } else {
1872                n->info = (uint32_t) s[0];
1873        }
1874
1875        return n;
1876}
1877
1878static var *evaluate(node *, var *);
1879
1880/* Use node as a regular expression. Supplied with node ptr and regex_t
1881 * storage space. Return ptr to regex (if result points to preg, it should
1882 * be later regfree'd manually).
1883 */
1884static regex_t *as_regex(node *op, regex_t *preg)
1885{
1886        int cflags;
1887        const char *s;
1888
1889        if (op->info == TI_REGEXP) {
1890                return icase ? op->r.ire : op->l.re;
1891        }
1892
1893        //tmpvar = nvalloc(1);
1894#define TMPVAR (&G.as_regex__tmpvar)
1895        // We use a single "static" tmpvar (instead of on-stack or malloced one)
1896        // to decrease memory consumption in deeply-recursive awk programs.
1897        // The rule to work safely is to never call evaluate() while our static
1898        // TMPVAR's value is still needed.
1899        s = getvar_s(evaluate(op, TMPVAR));
1900
1901        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1902        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1903         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1904         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1905         * (maybe gsub is not supposed to use REG_EXTENDED?).
1906         */
1907        if (regcomp(preg, s, cflags)) {
1908                cflags &= ~REG_EXTENDED;
1909                xregcomp(preg, s, cflags);
1910        }
1911        //nvfree(tmpvar, 1);
1912#undef TMPVAR
1913        return preg;
1914}
1915
1916/* gradually increasing buffer.
1917 * note that we reallocate even if n == old_size,
1918 * and thus there is at least one extra allocated byte.
1919 */
1920static char* qrealloc(char *b, int n, int *size)
1921{
1922        if (!b || n >= *size) {
1923                *size = n + (n>>1) + 80;
1924                b = xrealloc(b, *size);
1925        }
1926        return b;
1927}
1928
1929/* resize field storage space */
1930static void fsrealloc(int size)
1931{
1932        int i, newsize;
1933
1934        if (size >= maxfields) {
1935                /* Sanity cap, easier than catering for overflows */
1936                if (size > 0xffffff)
1937                        bb_die_memory_exhausted();
1938
1939                i = maxfields;
1940                maxfields = size + 16;
1941
1942                newsize = maxfields * sizeof(Fields[0]);
1943                debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1944                Fields = xrealloc(Fields, newsize);
1945                debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1946                /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1947
1948                for (; i < maxfields; i++) {
1949                        Fields[i].type = VF_SPECIAL;
1950                        Fields[i].string = NULL;
1951                }
1952        }
1953        /* if size < nfields, clear extra field variables */
1954        for (i = size; i < nfields; i++) {
1955                clrvar(Fields + i);
1956        }
1957        nfields = size;
1958}
1959
1960static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
1961{
1962        int r = regexec(preg, s, 1, pmatch, 0);
1963        if (r == 0 && pmatch[0].rm_eo == 0) {
1964                /* For example, happens when FS can match
1965                 * an empty string (awk -F ' *'). Logically,
1966                 * this should split into one-char fields.
1967                 * However, gawk 5.0.1 searches for first
1968                 * _non-empty_ separator string match:
1969                 */
1970                size_t ofs = 0;
1971                do {
1972                        ofs++;
1973                        if (!s[ofs])
1974                                return REG_NOMATCH;
1975                        regexec(preg, s + ofs, 1, pmatch, 0);
1976                } while (pmatch[0].rm_eo == 0);
1977                pmatch[0].rm_so += ofs;
1978                pmatch[0].rm_eo += ofs;
1979        }
1980        return r;
1981}
1982
1983static int awk_split(const char *s, node *spl, char **slist)
1984{
1985        int n;
1986        char c[4];
1987        char *s1;
1988
1989        /* in worst case, each char would be a separate field */
1990        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1991        strcpy(s1, s);
1992
1993        c[0] = c[1] = (char)spl->info;
1994        c[2] = c[3] = '\0';
1995        if (*getvar_s(intvar[RS]) == '\0')
1996                c[2] = '\n';
1997
1998        n = 0;
1999        if (spl->info == TI_REGEXP) {  /* regex split */
2000                if (!*s)
2001                        return n; /* "": zero fields */
2002                n++; /* at least one field will be there */
2003                do {
2004                        int l;
2005                        regmatch_t pmatch[1];
2006
2007                        l = strcspn(s, c+2); /* len till next NUL or \n */
2008                        if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
2009                         && pmatch[0].rm_so <= l
2010                        ) {
2011                                /* if (pmatch[0].rm_eo == 0) ... - impossible */
2012                                l = pmatch[0].rm_so;
2013                                n++; /* we saw yet another delimiter */
2014                        } else {
2015                                pmatch[0].rm_eo = l;
2016                                if (s[l])
2017                                        pmatch[0].rm_eo++;
2018                        }
2019                        s1 = mempcpy(s1, s, l);
2020                        *s1++ = '\0';
2021                        s += pmatch[0].rm_eo;
2022                } while (*s);
2023
2024                /* echo a-- | awk -F-- '{ print NF, length($NF), $NF }'
2025                 * should print "2 0 ":
2026                 */
2027                *s1 = '\0';
2028
2029                return n;
2030        }
2031        if (c[0] == '\0') {  /* null split */
2032                while (*s) {
2033                        *s1++ = *s++;
2034                        *s1++ = '\0';
2035                        n++;
2036                }
2037                return n;
2038        }
2039        if (c[0] != ' ') {  /* single-character split */
2040                if (icase) {
2041                        c[0] = toupper(c[0]);
2042                        c[1] = tolower(c[1]);
2043                }
2044                if (*s1)
2045                        n++;
2046                while ((s1 = strpbrk(s1, c)) != NULL) {
2047                        *s1++ = '\0';
2048                        n++;
2049                }
2050                return n;
2051        }
2052        /* space split */
2053        while (*s) {
2054                s = skip_whitespace(s);
2055                if (!*s)
2056                        break;
2057                n++;
2058                while (*s && !isspace(*s))
2059                        *s1++ = *s++;
2060                *s1++ = '\0';
2061        }
2062        return n;
2063}
2064
2065static void split_f0(void)
2066{
2067/* static char *fstrings; */
2068#define fstrings (G.split_f0__fstrings)
2069
2070        int i, n;
2071        char *s;
2072
2073        if (is_f0_split)
2074                return;
2075
2076        is_f0_split = TRUE;
2077        free(fstrings);
2078        fsrealloc(0);
2079        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
2080        fsrealloc(n);
2081        s = fstrings;
2082        for (i = 0; i < n; i++) {
2083                Fields[i].string = nextword(&s);
2084                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
2085        }
2086
2087        /* set NF manually to avoid side effects */
2088        clrvar(intvar[NF]);
2089        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2090        intvar[NF]->number = nfields;
2091#undef fstrings
2092}
2093
2094/* perform additional actions when some internal variables changed */
2095static void handle_special(var *v)
2096{
2097        int n;
2098        char *b;
2099        const char *sep, *s;
2100        int sl, l, len, i, bsize;
2101
2102        if (!(v->type & VF_SPECIAL))
2103                return;
2104
2105        if (v == intvar[NF]) {
2106                n = (int)getvar_i(v);
2107                if (n < 0)
2108                        syntax_error("NF set to negative value");
2109                fsrealloc(n);
2110
2111                /* recalculate $0 */
2112                sep = getvar_s(intvar[OFS]);
2113                sl = strlen(sep);
2114                b = NULL;
2115                len = 0;
2116                for (i = 0; i < n; i++) {
2117                        s = getvar_s(&Fields[i]);
2118                        l = strlen(s);
2119                        if (b) {
2120                                memcpy(b+len, sep, sl);
2121                                len += sl;
2122                        }
2123                        b = qrealloc(b, len+l+sl, &bsize);
2124                        memcpy(b+len, s, l);
2125                        len += l;
2126                }
2127                if (b)
2128                        b[len] = '\0';
2129                setvar_p(intvar[F0], b);
2130                is_f0_split = TRUE;
2131
2132        } else if (v == intvar[F0]) {
2133                is_f0_split = FALSE;
2134
2135        } else if (v == intvar[FS]) {
2136                /*
2137                 * The POSIX-2008 standard says that changing FS should have no effect on the
2138                 * current input line, but only on the next one. The language is:
2139                 *
2140                 * > Before the first reference to a field in the record is evaluated, the record
2141                 * > shall be split into fields, according to the rules in Regular Expressions,
2142                 * > using the value of FS that was current at the time the record was read.
2143                 *
2144                 * So, split up current line before assignment to FS:
2145                 */
2146                split_f0();
2147
2148                mk_splitter(getvar_s(v), &fsplitter);
2149        } else if (v == intvar[RS]) {
2150                mk_splitter(getvar_s(v), &rsplitter);
2151        } else if (v == intvar[IGNORECASE]) {
2152                icase = istrue(v);
2153        } else {                                /* $n */
2154                n = getvar_i(intvar[NF]);
2155                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
2156                /* right here v is invalid. Just to note... */
2157        }
2158}
2159
2160/* step through func/builtin/etc arguments */
2161static node *nextarg(node **pn)
2162{
2163        node *n;
2164
2165        n = *pn;
2166        if (n && n->info == TI_COMMA) {
2167                *pn = n->r.n;
2168                n = n->l.n;
2169        } else {
2170                *pn = NULL;
2171        }
2172        return n;
2173}
2174
2175static void hashwalk_init(var *v, xhash *array)
2176{
2177        hash_item *hi;
2178        unsigned i;
2179        walker_list *w;
2180        walker_list *prev_walker;
2181
2182        if (v->type & VF_WALK) {
2183                prev_walker = v->x.walker;
2184        } else {
2185                v->type |= VF_WALK;
2186                prev_walker = NULL;
2187        }
2188        debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
2189
2190        w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
2191        debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
2192        w->cur = w->end = w->wbuf;
2193        w->prev = prev_walker;
2194        for (i = 0; i < array->csize; i++) {
2195                hi = array->items[i];
2196                while (hi) {
2197                        w->end = stpcpy(w->end, hi->name) + 1;
2198                        hi = hi->next;
2199                }
2200        }
2201}
2202
2203static int hashwalk_next(var *v)
2204{
2205        walker_list *w = v->x.walker;
2206
2207        if (w->cur >= w->end) {
2208                walker_list *prev_walker = w->prev;
2209
2210                debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
2211                free(w);
2212                v->x.walker = prev_walker;
2213                return FALSE;
2214        }
2215
2216        setvar_s(v, nextword(&w->cur));
2217        return TRUE;
2218}
2219
2220/* evaluate node, return 1 when result is true, 0 otherwise */
2221static int ptest(node *pattern)
2222{
2223        // We use a single "static" tmpvar (instead of on-stack or malloced one)
2224        // to decrease memory consumption in deeply-recursive awk programs.
2225        // The rule to work safely is to never call evaluate() while our static
2226        // TMPVAR's value is still needed.
2227        return istrue(evaluate(pattern, &G.ptest__tmpvar));
2228}
2229
2230/* read next record from stream rsm into a variable v */
2231static int awk_getline(rstream *rsm, var *v)
2232{
2233        char *b;
2234        regmatch_t pmatch[1];
2235        int size, a, p, pp = 0;
2236        int fd, so, eo, r, rp;
2237        char c, *m, *s;
2238
2239        debug_printf_eval("entered %s()\n", __func__);
2240
2241        /* we're using our own buffer since we need access to accumulating
2242         * characters
2243         */
2244        fd = fileno(rsm->F);
2245        m = rsm->buffer;
2246        a = rsm->adv;
2247        p = rsm->pos;
2248        size = rsm->size;
2249        c = (char) rsplitter.n.info;
2250        rp = 0;
2251
2252        if (!m)
2253                m = qrealloc(m, 256, &size);
2254
2255        do {
2256                b = m + a;
2257                so = eo = p;
2258                r = 1;
2259                if (p > 0) {
2260                        if (rsplitter.n.info == TI_REGEXP) {
2261                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2262                                                        b, 1, pmatch, 0) == 0) {
2263                                        so = pmatch[0].rm_so;
2264                                        eo = pmatch[0].rm_eo;
2265                                        if (b[eo] != '\0')
2266                                                break;
2267                                }
2268                        } else if (c != '\0') {
2269                                s = strchr(b+pp, c);
2270                                if (!s)
2271                                        s = memchr(b+pp, '\0', p - pp);
2272                                if (s) {
2273                                        so = eo = s-b;
2274                                        eo++;
2275                                        break;
2276                                }
2277                        } else {
2278                                while (b[rp] == '\n')
2279                                        rp++;
2280                                s = strstr(b+rp, "\n\n");
2281                                if (s) {
2282                                        so = eo = s-b;
2283                                        while (b[eo] == '\n')
2284                                                eo++;
2285                                        if (b[eo] != '\0')
2286                                                break;
2287                                }
2288                        }
2289                }
2290
2291                if (a > 0) {
2292                        memmove(m, m+a, p+1);
2293                        b = m;
2294                        a = 0;
2295                }
2296
2297                m = qrealloc(m, a+p+128, &size);
2298                b = m + a;
2299                pp = p;
2300                p += safe_read(fd, b+p, size-p-1);
2301                if (p < pp) {
2302                        p = 0;
2303                        r = 0;
2304                        setvar_i(intvar[ERRNO], errno);
2305                }
2306                b[p] = '\0';
2307
2308        } while (p > pp);
2309
2310        if (p == 0) {
2311                r--;
2312        } else {
2313                c = b[so]; b[so] = '\0';
2314                setvar_s(v, b+rp);
2315                v->type |= VF_USER;
2316                b[so] = c;
2317                c = b[eo]; b[eo] = '\0';
2318                setvar_s(intvar[RT], b+so);
2319                b[eo] = c;
2320        }
2321
2322        rsm->buffer = m;
2323        rsm->adv = a + eo;
2324        rsm->pos = p - eo;
2325        rsm->size = size;
2326
2327        debug_printf_eval("returning from %s(): %d\n", __func__, r);
2328
2329        return r;
2330}
2331
2332/* formatted output into an allocated buffer, return ptr to buffer */
2333#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2334# define awk_printf(a, b) awk_printf(a)
2335#endif
2336static char *awk_printf(node *n, size_t *len)
2337{
2338        char *b;
2339        char *fmt, *f;
2340        size_t i;
2341
2342        //tmpvar = nvalloc(1);
2343#define TMPVAR (&G.awk_printf__tmpvar)
2344        // We use a single "static" tmpvar (instead of on-stack or malloced one)
2345        // to decrease memory consumption in deeply-recursive awk programs.
2346        // The rule to work safely is to never call evaluate() while our static
2347        // TMPVAR's value is still needed.
2348        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2349        // ^^^^^^^^^ here we immediately strdup() the value, so the later call
2350        // to evaluate() potentially recursing into another awk_printf() can't
2351        // mangle the value.
2352
2353        b = NULL;
2354        i = 0;
2355        while (1) { /* "print one format spec" loop */
2356                char *s;
2357                char c;
2358                char sv;
2359                var *arg;
2360                size_t slen;
2361
2362                /* Find end of the next format spec, or end of line */
2363                s = f;
2364                while (1) {
2365                        c = *f;
2366                        if (!c) /* no percent chars found at all */
2367                                goto nul;
2368                        f++;
2369                        if (c == '%')
2370                                break;
2371                }
2372                /* we are past % in "....%..." */
2373                c = *f;
2374                if (!c) /* "....%" */
2375                        goto nul;
2376                if (c == '%') { /* "....%%...." */
2377                        slen = f - s;
2378                        s = xstrndup(s, slen);
2379                        f++;
2380                        goto append; /* print "....%" part verbatim */
2381                }
2382                while (1) {
2383                        if (isalpha(c))
2384                                break;
2385                        if (c == '*')
2386                                syntax_error("%*x formats are not supported");
2387                        c = *++f;
2388                        if (!c) { /* "....%...." and no letter found after % */
2389                                /* Example: awk 'BEGIN { printf "^^^%^^^\n"; }' */
2390 nul:
2391                                slen = f - s;
2392                                goto tail; /* print remaining string, exit loop */
2393                        }
2394                }
2395                /* we are at A in "....%...A..." */
2396
2397                arg = evaluate(nextarg(&n), TMPVAR);
2398
2399                /* Result can be arbitrarily long. Example:
2400                 *  printf "%99999s", "BOOM"
2401                 */
2402                sv = *++f;
2403                *f = '\0';
2404                if (c == 'c') {
2405                        char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2406                        char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
2407                        slen = strlen(r);
2408                        if (cc == '\0') /* if cc is NUL, re-format the string with it */
2409                                sprintf(r, s, cc);
2410                        s = r;
2411                } else {
2412                        if (c == 's') {
2413                                s = xasprintf(s, getvar_s(arg));
2414                        } else {
2415                                double d = getvar_i(arg);
2416                                if (strchr("diouxX", c)) {
2417//TODO: make it wider here (%x -> %llx etc)?
2418                                        s = xasprintf(s, (int)d);
2419                                } else if (strchr("eEfFgGaA", c)) {
2420                                        s = xasprintf(s, d);
2421                                } else {
2422//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
2423                                        syntax_error(EMSG_INV_FMT);
2424                                }
2425                        }
2426                        slen = strlen(s);
2427                }
2428                *f = sv;
2429 append:
2430                if (i == 0) {
2431                        b = s;
2432                        i = slen;
2433                        continue;
2434                }
2435 tail:
2436                b = xrealloc(b, i + slen + 1);
2437                strcpy(b + i, s);
2438                i += slen;
2439                if (!c) /* s is NOT allocated and this is the last part of string? */
2440                        break;
2441                free(s);
2442        }
2443
2444        free(fmt);
2445        //nvfree(tmpvar, 1);
2446#undef TMPVAR
2447
2448#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2449        if (len)
2450                *len = i;
2451#endif
2452        return b;
2453}
2454
2455/* Common substitution routine.
2456 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2457 * store result into (dest), return number of substitutions.
2458 * If nm = 0, replace all matches.
2459 * If src or dst is NULL, use $0.
2460 * If subexp != 0, enable subexpression matching (\1-\9).
2461 */
2462static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2463{
2464        char *resbuf;
2465        const char *sp;
2466        int match_no, residx, replen, resbufsize;
2467        int regexec_flags;
2468        regmatch_t pmatch[10];
2469        regex_t sreg, *regex;
2470
2471        resbuf = NULL;
2472        residx = 0;
2473        match_no = 0;
2474        regexec_flags = 0;
2475        regex = as_regex(rn, &sreg);
2476        sp = getvar_s(src ? src : intvar[F0]);
2477        replen = strlen(repl);
2478        while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2479                int so = pmatch[0].rm_so;
2480                int eo = pmatch[0].rm_eo;
2481
2482                //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2483                resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2484                memcpy(resbuf + residx, sp, eo);
2485                residx += eo;
2486                if (++match_no >= nm) {
2487                        const char *s;
2488                        int nbs;
2489
2490                        /* replace */
2491                        residx -= (eo - so);
2492                        nbs = 0;
2493                        for (s = repl; *s; s++) {
2494                                char c = resbuf[residx++] = *s;
2495                                if (c == '\\') {
2496                                        nbs++;
2497                                        continue;
2498                                }
2499                                if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2500                                        int j;
2501                                        residx -= ((nbs + 3) >> 1);
2502                                        j = 0;
2503                                        if (c != '&') {
2504                                                j = c - '0';
2505                                                nbs++;
2506                                        }
2507                                        if (nbs % 2) {
2508                                                resbuf[residx++] = c;
2509                                        } else {
2510                                                int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2511                                                resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2512                                                memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2513                                                residx += n;
2514                                        }
2515                                }
2516                                nbs = 0;
2517                        }
2518                }
2519
2520                regexec_flags = REG_NOTBOL;
2521                sp += eo;
2522                if (match_no == nm)
2523                        break;
2524                if (eo == so) {
2525                        /* Empty match (e.g. "b*" will match anywhere).
2526                         * Advance by one char. */
2527//BUG (bug 1333):
2528//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2529//... and will erroneously match "b" even though it is NOT at the word start.
2530//we need REG_NOTBOW but it does not exist...
2531//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2532//it should be able to do it correctly.
2533                        /* Subtle: this is safe only because
2534                         * qrealloc allocated at least one extra byte */
2535                        resbuf[residx] = *sp;
2536                        if (*sp == '\0')
2537                                goto ret;
2538                        sp++;
2539                        residx++;
2540                }
2541        }
2542
2543        resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2544        strcpy(resbuf + residx, sp);
2545 ret:
2546        //bb_error_msg("end sp:'%s'%p", sp,sp);
2547        setvar_p(dest ? dest : intvar[F0], resbuf);
2548        if (regex == &sreg)
2549                regfree(regex);
2550        return match_no;
2551}
2552
2553static NOINLINE int do_mktime(const char *ds)
2554{
2555        struct tm then;
2556        int count;
2557
2558        /*memset(&then, 0, sizeof(then)); - not needed */
2559        then.tm_isdst = -1; /* default is unknown */
2560
2561        /* manpage of mktime says these fields are ints,
2562         * so we can sscanf stuff directly into them */
2563        count = sscanf(ds, "%u %u %u %u %u %u %d",
2564                &then.tm_year, &then.tm_mon, &then.tm_mday,
2565                &then.tm_hour, &then.tm_min, &then.tm_sec,
2566                &then.tm_isdst);
2567
2568        if (count < 6
2569         || (unsigned)then.tm_mon < 1
2570         || (unsigned)then.tm_year < 1900
2571        ) {
2572                return -1;
2573        }
2574
2575        then.tm_mon -= 1;
2576        then.tm_year -= 1900;
2577
2578        return mktime(&then);
2579}
2580
2581/* Reduce stack usage in exec_builtin() by keeping match() code separate */
2582static NOINLINE var *do_match(node *an1, const char *as0)
2583{
2584        regmatch_t pmatch[1];
2585        regex_t sreg, *re;
2586        int n, start, len;
2587
2588        re = as_regex(an1, &sreg);
2589        n = regexec(re, as0, 1, pmatch, 0);
2590        if (re == &sreg)
2591                regfree(re);
2592        start = 0;
2593        len = -1;
2594        if (n == 0) {
2595                start = pmatch[0].rm_so + 1;
2596                len = pmatch[0].rm_eo - pmatch[0].rm_so;
2597        }
2598        setvar_i(newvar("RLENGTH"), len);
2599        return setvar_i(newvar("RSTART"), start);
2600}
2601
2602/* Reduce stack usage in evaluate() by keeping builtins' code separate */
2603static NOINLINE var *exec_builtin(node *op, var *res)
2604{
2605#define tspl (G.exec_builtin__tspl)
2606
2607        var *tmpvars;
2608        node *an[4];
2609        var *av[4];
2610        const char *as[4];
2611        node *spl;
2612        uint32_t isr, info;
2613        int nargs;
2614        time_t tt;
2615        int i, l, ll, n;
2616
2617        tmpvars = nvalloc(4);
2618#define TMPVAR0 (tmpvars)
2619#define TMPVAR1 (tmpvars + 1)
2620#define TMPVAR2 (tmpvars + 2)
2621#define TMPVAR3 (tmpvars + 3)
2622#define TMPVAR(i) (tmpvars + (i))
2623        isr = info = op->info;
2624        op = op->l.n;
2625
2626        av[2] = av[3] = NULL;
2627        for (i = 0; i < 4 && op; i++) {
2628                an[i] = nextarg(&op);
2629                if (isr & 0x09000000) {
2630                        av[i] = evaluate(an[i], TMPVAR(i));
2631                        if (isr & 0x08000000)
2632                                as[i] = getvar_s(av[i]);
2633                }
2634                isr >>= 1;
2635        }
2636
2637        nargs = i;
2638        if ((uint32_t)nargs < (info >> 30))
2639                syntax_error(EMSG_TOO_FEW_ARGS);
2640
2641        info &= OPNMASK;
2642        switch (info) {
2643
2644        case B_a2:
2645                if (ENABLE_FEATURE_AWK_LIBM)
2646                        setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2647                else
2648                        syntax_error(EMSG_NO_MATH);
2649                break;
2650
2651        case B_sp: {
2652                char *s, *s1;
2653
2654                if (nargs > 2) {
2655                        spl = (an[2]->info == TI_REGEXP) ? an[2]
2656                                : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2657                } else {
2658                        spl = &fsplitter.n;
2659                }
2660
2661                n = awk_split(as[0], spl, &s);
2662                s1 = s;
2663                clear_array(iamarray(av[1]));
2664                for (i = 1; i <= n; i++)
2665                        setari_u(av[1], i, nextword(&s));
2666                free(s1);
2667                setvar_i(res, n);
2668                break;
2669        }
2670
2671        case B_ss: {
2672                char *s;
2673
2674                l = strlen(as[0]);
2675                i = getvar_i(av[1]) - 1;
2676                if (i > l)
2677                        i = l;
2678                if (i < 0)
2679                        i = 0;
2680                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2681                if (n < 0)
2682                        n = 0;
2683                s = xstrndup(as[0]+i, n);
2684                setvar_p(res, s);
2685                break;
2686        }
2687
2688        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2689         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2690        case B_an:
2691                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2692                break;
2693
2694        case B_co:
2695                setvar_i(res, ~getvar_i_int(av[0]));
2696                break;
2697
2698        case B_ls:
2699                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2700                break;
2701
2702        case B_or:
2703                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2704                break;
2705
2706        case B_rs:
2707                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2708                break;
2709
2710        case B_xo:
2711                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2712                break;
2713
2714        case B_lo:
2715        case B_up: {
2716                char *s, *s1;
2717                s1 = s = xstrdup(as[0]);
2718                while (*s1) {
2719                        //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2720                        if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2721                                *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2722                        s1++;
2723                }
2724                setvar_p(res, s);
2725                break;
2726        }
2727
2728        case B_ix:
2729                n = 0;
2730                ll = strlen(as[1]);
2731                l = strlen(as[0]) - ll;
2732                if (ll > 0 && l >= 0) {
2733                        if (!icase) {
2734                                char *s = strstr(as[0], as[1]);
2735                                if (s)
2736                                        n = (s - as[0]) + 1;
2737                        } else {
2738                                /* this piece of code is terribly slow and
2739                                 * really should be rewritten
2740                                 */
2741                                for (i = 0; i <= l; i++) {
2742                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2743                                                n = i+1;
2744                                                break;
2745                                        }
2746                                }
2747                        }
2748                }
2749                setvar_i(res, n);
2750                break;
2751
2752        case B_ti:
2753                if (nargs > 1)
2754                        tt = getvar_i(av[1]);
2755                else
2756                        time(&tt);
2757                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2758                i = strftime(g_buf, MAXVARFMT,
2759                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2760                        localtime(&tt));
2761                g_buf[i] = '\0';
2762                setvar_s(res, g_buf);
2763                break;
2764
2765        case B_mt:
2766                setvar_i(res, do_mktime(as[0]));
2767                break;
2768
2769        case B_ma:
2770                res = do_match(an[1], as[0]);
2771                break;
2772
2773        case B_ge:
2774                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2775                break;
2776
2777        case B_gs:
2778                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2779                break;
2780
2781        case B_su:
2782                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2783                break;
2784        }
2785
2786        nvfree(tmpvars, 4);
2787#undef TMPVAR0
2788#undef TMPVAR1
2789#undef TMPVAR2
2790#undef TMPVAR3
2791#undef TMPVAR
2792
2793        return res;
2794#undef tspl
2795}
2796
2797/* if expr looks like "var=value", perform assignment and return 1,
2798 * otherwise return 0 */
2799static int is_assignment(const char *expr)
2800{
2801        char *exprc, *val;
2802
2803        val = (char*)endofname(expr);
2804        if (val == (char*)expr || *val != '=') {
2805                return FALSE;
2806        }
2807
2808        exprc = xstrdup(expr);
2809        val = exprc + (val - expr);
2810        *val++ = '\0';
2811
2812        unescape_string_in_place(val);
2813        setvar_u(newvar(exprc), val);
2814        free(exprc);
2815        return TRUE;
2816}
2817
2818/* switch to next input file */
2819static rstream *next_input_file(void)
2820{
2821#define rsm          (G.next_input_file__rsm)
2822#define files_happen (G.next_input_file__files_happen)
2823
2824        const char *fname, *ind;
2825
2826        if (rsm.F)
2827                fclose(rsm.F);
2828        rsm.F = NULL;
2829        rsm.pos = rsm.adv = 0;
2830
2831        for (;;) {
2832                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2833                        if (files_happen)
2834                                return NULL;
2835                        fname = "-";
2836                        rsm.F = stdin;
2837                        break;
2838                }
2839                ind = getvar_s(incvar(intvar[ARGIND]));
2840                fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2841                if (fname && *fname && !is_assignment(fname)) {
2842                        rsm.F = xfopen_stdin(fname);
2843                        break;
2844                }
2845        }
2846
2847        files_happen = TRUE;
2848        setvar_s(intvar[FILENAME], fname);
2849        return &rsm;
2850#undef rsm
2851#undef files_happen
2852}
2853
2854/*
2855 * Evaluate node - the heart of the program. Supplied with subtree
2856 * and "res" variable to assign the result to if we evaluate an expression.
2857 * If node refers to e.g. a variable or a field, no assignment happens.
2858 * Return ptr to the result (which may or may not be the "res" variable!)
2859 */
2860#define XC(n) ((n) >> 8)
2861
2862static var *evaluate(node *op, var *res)
2863{
2864/* This procedure is recursive so we should count every byte */
2865#define fnargs (G.evaluate__fnargs)
2866/* seed is initialized to 1 */
2867#define seed   (G.evaluate__seed)
2868#define sreg   (G.evaluate__sreg)
2869
2870        var *tmpvars;
2871
2872        if (!op)
2873                return setvar_s(res, NULL);
2874
2875        debug_printf_eval("entered %s()\n", __func__);
2876
2877        tmpvars = nvalloc(2);
2878#define TMPVAR0 (tmpvars)
2879#define TMPVAR1 (tmpvars + 1)
2880
2881        while (op) {
2882                struct {
2883                        var *v;
2884                        const char *s;
2885                } L = L; /* for compiler */
2886                struct {
2887                        var *v;
2888                        const char *s;
2889                } R = R;
2890                double L_d = L_d;
2891                uint32_t opinfo;
2892                int opn;
2893                node *op1;
2894
2895                opinfo = op->info;
2896                opn = (opinfo & OPNMASK);
2897                g_lineno = op->lineno;
2898                op1 = op->l.n;
2899                debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2900
2901                /* execute inevitable things */
2902                if (opinfo & OF_RES1) {
2903                        if ((opinfo & OF_REQUIRED) && !op1)
2904                                syntax_error(EMSG_TOO_FEW_ARGS);
2905                        L.v = evaluate(op1, TMPVAR0);
2906                        if (opinfo & OF_STR1) {
2907                                L.s = getvar_s(L.v);
2908                                debug_printf_eval("L.s:'%s'\n", L.s);
2909                        }
2910                        if (opinfo & OF_NUM1) {
2911                                L_d = getvar_i(L.v);
2912                                debug_printf_eval("L_d:%f\n", L_d);
2913                        }
2914                }
2915                /* NB: Must get string/numeric values of L (done above)
2916                 * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
2917                 * and right one is large, then L.v points to Fields[NNN1],
2918                 * second evaluate() reallocates and moves (!) Fields[],
2919                 * R.v points to Fields[NNN2] but L.v now points to freed mem!
2920                 * (Seen trying to evaluate "$444 $44444")
2921                 */
2922                if (opinfo & OF_RES2) {
2923                        R.v = evaluate(op->r.n, TMPVAR1);
2924                        //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2925                        //L.v = NULL;
2926                        if (opinfo & OF_STR2) {
2927                                R.s = getvar_s(R.v);
2928                                debug_printf_eval("R.s:'%s'\n", R.s);
2929                        }
2930                }
2931
2932                debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2933                switch (XC(opinfo & OPCLSMASK)) {
2934
2935                /* -- iterative node type -- */
2936
2937                /* test pattern */
2938                case XC( OC_TEST ):
2939                        debug_printf_eval("TEST\n");
2940                        if (op1->info == TI_COMMA) {
2941                                /* it's range pattern */
2942                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2943                                        op->info |= OF_CHECKED;
2944                                        if (ptest(op1->r.n))
2945                                                op->info &= ~OF_CHECKED;
2946                                        op = op->a.n;
2947                                } else {
2948                                        op = op->r.n;
2949                                }
2950                        } else {
2951                                op = ptest(op1) ? op->a.n : op->r.n;
2952                        }
2953                        break;
2954
2955                /* just evaluate an expression, also used as unconditional jump */
2956                case XC( OC_EXEC ):
2957                        debug_printf_eval("EXEC\n");
2958                        break;
2959
2960                /* branch, used in if-else and various loops */
2961                case XC( OC_BR ):
2962                        debug_printf_eval("BR\n");
2963                        op = istrue(L.v) ? op->a.n : op->r.n;
2964                        break;
2965
2966                /* initialize for-in loop */
2967                case XC( OC_WALKINIT ):
2968                        debug_printf_eval("WALKINIT\n");
2969                        hashwalk_init(L.v, iamarray(R.v));
2970                        break;
2971
2972                /* get next array item */
2973                case XC( OC_WALKNEXT ):
2974                        debug_printf_eval("WALKNEXT\n");
2975                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2976                        break;
2977
2978                case XC( OC_PRINT ):
2979                        debug_printf_eval("PRINT /\n");
2980                case XC( OC_PRINTF ):
2981                        debug_printf_eval("PRINTF\n");
2982                {
2983                        FILE *F = stdout;
2984
2985                        if (op->r.n) {
2986                                rstream *rsm = newfile(R.s);
2987                                if (!rsm->F) {
2988                                        if (opn == '|') {
2989                                                rsm->F = popen(R.s, "w");
2990                                                if (rsm->F == NULL)
2991                                                        bb_simple_perror_msg_and_die("popen");
2992                                                rsm->is_pipe = 1;
2993                                        } else {
2994                                                rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2995                                        }
2996                                }
2997                                F = rsm->F;
2998                        }
2999
3000                        /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
3001                         * additional bits to opinfos of print/printf with redirects
3002                         */
3003                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
3004                                if (!op1) {
3005                                        fputs(getvar_s(intvar[F0]), F);
3006                                } else {
3007                                        for (;;) {
3008                                                var *v = evaluate(nextarg(&op1), TMPVAR0);
3009                                                if (v->type & VF_NUMBER) {
3010                                                        fmt_num(getvar_s(intvar[OFMT]),
3011                                                                        getvar_i(v));
3012                                                        fputs(g_buf, F);
3013                                                } else {
3014                                                        fputs(getvar_s(v), F);
3015                                                }
3016                                                if (!op1)
3017                                                        break;
3018                                                fputs(getvar_s(intvar[OFS]), F);
3019                                        }
3020                                }
3021                                fputs(getvar_s(intvar[ORS]), F);
3022                        } else {        /* PRINTF */
3023                                IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
3024                                char *s = awk_printf(op1, &len);
3025#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3026                                fwrite(s, len, 1, F);
3027#else
3028                                fputs(s, F);
3029#endif
3030                                free(s);
3031                        }
3032                        fflush(F);
3033                        break;
3034                }
3035
3036                case XC( OC_DELETE ):
3037                        debug_printf_eval("DELETE\n");
3038                {
3039                        /* "delete" is special:
3040                         * "delete array[var--]" must evaluate index expr only once.
3041                         */
3042                        uint32_t info = op1->info & OPCLSMASK;
3043                        var *v;
3044
3045                        if (info == OC_VAR) {
3046                                v = op1->l.v;
3047                        } else if (info == OC_FNARG) {
3048                                v = &fnargs[op1->l.aidx];
3049                        } else {
3050                                syntax_error(EMSG_NOT_ARRAY);
3051                        }
3052                        if (op1->r.n) { /* array ref? */
3053                                const char *s;
3054                                s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3055                                hash_remove(iamarray(v), s);
3056                        } else {
3057                                clear_array(iamarray(v));
3058                        }
3059                        break;
3060                }
3061
3062                case XC( OC_NEWSOURCE ):
3063                        debug_printf_eval("NEWSOURCE\n");
3064                        g_progname = op->l.new_progname;
3065                        break;
3066
3067                case XC( OC_RETURN ):
3068                        debug_printf_eval("RETURN\n");
3069                        copyvar(res, L.v);
3070                        break;
3071
3072                case XC( OC_NEXTFILE ):
3073                        debug_printf_eval("NEXTFILE\n");
3074                        nextfile = TRUE;
3075                case XC( OC_NEXT ):
3076                        debug_printf_eval("NEXT\n");
3077                        nextrec = TRUE;
3078                case XC( OC_DONE ):
3079                        debug_printf_eval("DONE\n");
3080                        clrvar(res);
3081                        break;
3082
3083                case XC( OC_EXIT ):
3084                        debug_printf_eval("EXIT\n");
3085                        if (op1)
3086                                G.exitcode = (int)L_d;
3087                        awk_exit();
3088
3089                /* -- recursive node type -- */
3090
3091                case XC( OC_VAR ):
3092                        debug_printf_eval("VAR\n");
3093                        L.v = op->l.v;
3094                        if (L.v == intvar[NF])
3095                                split_f0();
3096                        goto v_cont;
3097
3098                case XC( OC_FNARG ):
3099                        debug_printf_eval("FNARG[%d]\n", op->l.aidx);
3100                        L.v = &fnargs[op->l.aidx];
3101 v_cont:
3102                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
3103                        break;
3104
3105                case XC( OC_IN ):
3106                        debug_printf_eval("IN\n");
3107                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
3108                        break;
3109
3110                case XC( OC_REGEXP ):
3111                        debug_printf_eval("REGEXP\n");
3112                        op1 = op;
3113                        L.s = getvar_s(intvar[F0]);
3114                        goto re_cont;
3115
3116                case XC( OC_MATCH ):
3117                        debug_printf_eval("MATCH\n");
3118                        op1 = op->r.n;
3119 re_cont:
3120                        {
3121                                regex_t *re = as_regex(op1, &sreg);
3122                                int i = regexec(re, L.s, 0, NULL, 0);
3123                                if (re == &sreg)
3124                                        regfree(re);
3125                                setvar_i(res, (i == 0) ^ (opn == '!'));
3126                        }
3127                        break;
3128
3129                case XC( OC_MOVE ):
3130                        debug_printf_eval("MOVE\n");
3131                        /* make sure that we never return a temp var */
3132                        if (L.v == TMPVAR0)
3133                                L.v = res;
3134                        /* if source is a temporary string, jusk relink it to dest */
3135                        if (R.v == TMPVAR1
3136                         && !(R.v->type & VF_NUMBER)
3137                                /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
3138                                 * L.v ends up a string, which is wrong */
3139                         /*&& R.v->string - always not NULL (right?) */
3140                        ) {
3141                                res = setvar_p(L.v, R.v->string); /* avoids strdup */
3142                                R.v->string = NULL;
3143                        } else {
3144                                res = copyvar(L.v, R.v);
3145                        }
3146                        break;
3147
3148                case XC( OC_TERNARY ):
3149                        debug_printf_eval("TERNARY\n");
3150                        if (op->r.n->info != TI_COLON)
3151                                syntax_error(EMSG_POSSIBLE_ERROR);
3152                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
3153                        break;
3154
3155                case XC( OC_FUNC ): {
3156                        var *argvars, *sv_fnargs;
3157                        const char *sv_progname;
3158                        int nargs, i;
3159
3160                        debug_printf_eval("FUNC\n");
3161
3162                        if (!op->r.f->defined)
3163                                syntax_error(EMSG_UNDEF_FUNC);
3164
3165                        /* The body might be empty, still has to eval the args */
3166                        nargs = op->r.f->nargs;
3167                        argvars = nvalloc(nargs);
3168                        i = 0;
3169                        while (op1) {
3170                                var *arg = evaluate(nextarg(&op1), TMPVAR0);
3171                                if (i == nargs) {
3172                                        /* call with more arguments than function takes.
3173                                         * (gawk warns: "warning: function 'f' called with more arguments than declared").
3174                                         * They are still evaluated, but discarded: */
3175                                        clrvar(arg);
3176                                        continue;
3177                                }
3178                                copyvar(&argvars[i], arg);
3179                                argvars[i].type |= VF_CHILD;
3180                                argvars[i].x.parent = arg;
3181                                i++;
3182                        }
3183
3184                        sv_fnargs = fnargs;
3185                        sv_progname = g_progname;
3186
3187                        fnargs = argvars;
3188                        res = evaluate(op->r.f->body.first, res);
3189                        nvfree(argvars, nargs);
3190
3191                        g_progname = sv_progname;
3192                        fnargs = sv_fnargs;
3193
3194                        break;
3195                }
3196
3197                case XC( OC_GETLINE ):
3198                        debug_printf_eval("GETLINE /\n");
3199                case XC( OC_PGETLINE ):
3200                        debug_printf_eval("PGETLINE\n");
3201                {
3202                        rstream *rsm;
3203                        int i;
3204
3205                        if (op1) {
3206                                rsm = newfile(L.s);
3207                                if (!rsm->F) {
3208                                        /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
3209                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
3210                                                rsm->F = popen(L.s, "r");
3211                                                rsm->is_pipe = TRUE;
3212                                        } else {
3213                                                rsm->F = fopen_for_read(L.s);  /* not xfopen! */
3214                                        }
3215                                }
3216                        } else {
3217                                if (!iF)
3218                                        iF = next_input_file();
3219                                rsm = iF;
3220                        }
3221
3222                        if (!rsm || !rsm->F) {
3223                                setvar_i(intvar[ERRNO], errno);
3224                                setvar_i(res, -1);
3225                                break;
3226                        }
3227
3228                        if (!op->r.n)
3229                                R.v = intvar[F0];
3230
3231                        i = awk_getline(rsm, R.v);
3232                        if (i > 0 && !op1) {
3233                                incvar(intvar[FNR]);
3234                                incvar(intvar[NR]);
3235                        }
3236                        setvar_i(res, i);
3237                        break;
3238                }
3239
3240                /* simple builtins */
3241                case XC( OC_FBLTIN ): {
3242                        double R_d = R_d; /* for compiler */
3243                        debug_printf_eval("FBLTIN\n");
3244
3245                        if (op1 && op1->info == TI_COMMA)
3246                                /* Simple builtins take one arg maximum */
3247                                syntax_error("Too many arguments");
3248
3249                        switch (opn) {
3250                        case F_in:
3251                                R_d = (long long)L_d;
3252                                break;
3253
3254                        case F_rn: /*rand*/
3255                                if (op1)
3256                                        syntax_error("Too many arguments");
3257                        {
3258#if RAND_MAX >= 0x7fffffff
3259                                uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3260                                uint64_t v = ((uint64_t)rand() << 32) | u;
3261                                /* the above shift+or is optimized out on 32-bit arches */
3262# if RAND_MAX > 0x7fffffff
3263                                v &= 0x7fffffffffffffffULL;
3264# endif
3265                                R_d = (double)v / 0x8000000000000000ULL;
3266#else
3267# error Not implemented for this value of RAND_MAX
3268#endif
3269                                break;
3270                        }
3271                        case F_co:
3272                                if (ENABLE_FEATURE_AWK_LIBM) {
3273                                        R_d = cos(L_d);
3274                                        break;
3275                                }
3276
3277                        case F_ex:
3278                                if (ENABLE_FEATURE_AWK_LIBM) {
3279                                        R_d = exp(L_d);
3280                                        break;
3281                                }
3282
3283                        case F_lg:
3284                                if (ENABLE_FEATURE_AWK_LIBM) {
3285                                        R_d = log(L_d);
3286                                        break;
3287                                }
3288
3289                        case F_si:
3290                                if (ENABLE_FEATURE_AWK_LIBM) {
3291                                        R_d = sin(L_d);
3292                                        break;
3293                                }
3294
3295                        case F_sq:
3296                                if (ENABLE_FEATURE_AWK_LIBM) {
3297                                        R_d = sqrt(L_d);
3298                                        break;
3299                                }
3300
3301                                syntax_error(EMSG_NO_MATH);
3302                                break;
3303
3304                        case F_sr:
3305                                R_d = (double)seed;
3306                                seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
3307                                srand(seed);
3308                                break;
3309
3310                        case F_ti: /*systime*/
3311                                if (op1)
3312                                        syntax_error("Too many arguments");
3313                                R_d = time(NULL);
3314                                break;
3315
3316                        case F_le:
3317                                debug_printf_eval("length: L.s:'%s'\n", L.s);
3318                                if (!op1) {
3319                                        L.s = getvar_s(intvar[F0]);
3320                                        debug_printf_eval("length: L.s='%s'\n", L.s);
3321                                }
3322                                else if (L.v->type & VF_ARRAY) {
3323                                        R_d = L.v->x.array->nel;
3324                                        debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
3325                                        break;
3326                                }
3327                                R_d = strlen(L.s);
3328                                break;
3329
3330                        case F_sy:
3331                                fflush_all();
3332                                R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
3333                                                ? (system(L.s) >> 8) : 0;
3334                                break;
3335
3336                        case F_ff:
3337                                if (!op1) {
3338                                        fflush(stdout);
3339                                } else if (L.s && *L.s) {
3340                                        rstream *rsm = newfile(L.s);
3341                                        fflush(rsm->F);
3342                                } else {
3343                                        fflush_all();
3344                                }
3345                                break;
3346
3347                        case F_cl: {
3348                                rstream *rsm;
3349                                int err = 0;
3350                                rsm = (rstream *)hash_search(fdhash, L.s);
3351                                debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
3352                                if (rsm) {
3353                                        debug_printf_eval("OC_FBLTIN F_cl "
3354                                                "rsm->is_pipe:%d, ->F:%p\n",
3355                                                rsm->is_pipe, rsm->F);
3356                                        /* Can be NULL if open failed. Example:
3357                                         * getline line <"doesnt_exist";
3358                                         * close("doesnt_exist"); <--- here rsm->F is NULL
3359                                         */
3360                                        if (rsm->F)
3361                                                err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
3362//TODO: fix this case:
3363// $ awk 'BEGIN { print close(""); print ERRNO }'
3364// -1
3365// close of redirection that was never opened
3366// (we print 0, 0)
3367                                        free(rsm->buffer);
3368                                        hash_remove(fdhash, L.s);
3369                                }
3370                                if (err)
3371                                        setvar_i(intvar[ERRNO], errno);
3372                                R_d = (double)err;
3373                                break;
3374                        }
3375                        } /* switch */
3376                        setvar_i(res, R_d);
3377                        break;
3378                }
3379
3380                case XC( OC_BUILTIN ):
3381                        debug_printf_eval("BUILTIN\n");
3382                        res = exec_builtin(op, res);
3383                        break;
3384
3385                case XC( OC_SPRINTF ):
3386                        debug_printf_eval("SPRINTF\n");
3387                        setvar_p(res, awk_printf(op1, NULL));
3388                        break;
3389
3390                case XC( OC_UNARY ):
3391                        debug_printf_eval("UNARY\n");
3392                {
3393                        double Ld, R_d;
3394
3395                        Ld = R_d = getvar_i(R.v);
3396                        switch (opn) {
3397                        case 'P':
3398                                Ld = ++R_d;
3399                                goto r_op_change;
3400                        case 'p':
3401                                R_d++;
3402                                goto r_op_change;
3403                        case 'M':
3404                                Ld = --R_d;
3405                                goto r_op_change;
3406                        case 'm':
3407                                R_d--;
3408 r_op_change:
3409                                setvar_i(R.v, R_d);
3410                                break;
3411                        case '!':
3412                                Ld = !istrue(R.v);
3413                                break;
3414                        case '-':
3415                                Ld = -R_d;
3416                                break;
3417                        }
3418                        setvar_i(res, Ld);
3419                        break;
3420                }
3421
3422                case XC( OC_FIELD ):
3423                        debug_printf_eval("FIELD\n");
3424                {
3425                        int i = (int)getvar_i(R.v);
3426                        if (i < 0)
3427                                syntax_error(EMSG_NEGATIVE_FIELD);
3428                        if (i == 0) {
3429                                res = intvar[F0];
3430                        } else {
3431                                split_f0();
3432                                if (i > nfields)
3433                                        fsrealloc(i);
3434                                res = &Fields[i - 1];
3435                        }
3436                        break;
3437                }
3438
3439                /* concatenation (" ") and index joining (",") */
3440                case XC( OC_CONCAT ):
3441                        debug_printf_eval("CONCAT /\n");
3442                case XC( OC_COMMA ): {
3443                        const char *sep = "";
3444                        debug_printf_eval("COMMA\n");
3445                        if (opinfo == TI_COMMA)
3446                                sep = getvar_s(intvar[SUBSEP]);
3447                        setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
3448                        break;
3449                }
3450
3451                case XC( OC_LAND ):
3452                        debug_printf_eval("LAND\n");
3453                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3454                        break;
3455
3456                case XC( OC_LOR ):
3457                        debug_printf_eval("LOR\n");
3458                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3459                        break;
3460
3461                case XC( OC_BINARY ):
3462                        debug_printf_eval("BINARY /\n");
3463                case XC( OC_REPLACE ):
3464                        debug_printf_eval("REPLACE\n");
3465                {
3466                        double R_d = getvar_i(R.v);
3467                        debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
3468                        switch (opn) {
3469                        case '+':
3470                                L_d += R_d;
3471                                break;
3472                        case '-':
3473                                L_d -= R_d;
3474                                break;
3475                        case '*':
3476                                L_d *= R_d;
3477                                break;
3478                        case '/':
3479                                if (R_d == 0)
3480                                        syntax_error(EMSG_DIV_BY_ZERO);
3481                                L_d /= R_d;
3482                                break;
3483                        case '&':
3484                                if (ENABLE_FEATURE_AWK_LIBM)
3485                                        L_d = pow(L_d, R_d);
3486                                else
3487                                        syntax_error(EMSG_NO_MATH);
3488                                break;
3489                        case '%':
3490                                if (R_d == 0)
3491                                        syntax_error(EMSG_DIV_BY_ZERO);
3492                                L_d -= (long long)(L_d / R_d) * R_d;
3493                                break;
3494                        }
3495                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3496                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3497                        break;
3498                }
3499
3500                case XC( OC_COMPARE ): {
3501                        int i = i; /* for compiler */
3502                        double Ld;
3503                        debug_printf_eval("COMPARE\n");
3504
3505                        if (is_numeric(L.v) && is_numeric(R.v)) {
3506                                Ld = getvar_i(L.v) - getvar_i(R.v);
3507                        } else {
3508                                const char *l = getvar_s(L.v);
3509                                const char *r = getvar_s(R.v);
3510                                Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3511                        }
3512                        switch (opn & 0xfe) {
3513                        case 0:
3514                                i = (Ld > 0);
3515                                break;
3516                        case 2:
3517                                i = (Ld >= 0);
3518                                break;
3519                        case 4:
3520                                i = (Ld == 0);
3521                                break;
3522                        }
3523                        debug_printf_eval("COMPARE result: %d\n", (i == 0) ^ (opn & 1));
3524                        setvar_i(res, (i == 0) ^ (opn & 1));
3525                        break;
3526                }
3527
3528                default:
3529                        syntax_error(EMSG_POSSIBLE_ERROR);
3530                } /* switch */
3531
3532                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3533                        op = op->a.n;
3534                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3535                        break;
3536                if (nextrec)
3537                        break;
3538        } /* while (op) */
3539
3540        nvfree(tmpvars, 2);
3541#undef TMPVAR0
3542#undef TMPVAR1
3543
3544        debug_printf_eval("returning from %s(): %p\n", __func__, res);
3545        return res;
3546#undef fnargs
3547#undef seed
3548#undef sreg
3549}
3550
3551/* -------- main & co. -------- */
3552
3553static int awk_exit(void)
3554{
3555        unsigned i;
3556
3557        if (!exiting) {
3558                exiting = TRUE;
3559                nextrec = FALSE;
3560                evaluate(endseq.first, &G.exit__tmpvar);
3561        }
3562
3563        /* waiting for children */
3564        for (i = 0; i < fdhash->csize; i++) {
3565                hash_item *hi;
3566                hi = fdhash->items[i];
3567                while (hi) {
3568                        if (hi->data.rs.F && hi->data.rs.is_pipe)
3569                                pclose(hi->data.rs.F);
3570                        hi = hi->next;
3571                }
3572        }
3573
3574        exit(G.exitcode);
3575}
3576
3577int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3578int awk_main(int argc UNUSED_PARAM, char **argv)
3579{
3580        unsigned opt;
3581        char *opt_F;
3582        llist_t *list_v = NULL;
3583        llist_t *list_f = NULL;
3584#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3585        llist_t *list_e = NULL;
3586#endif
3587        int i;
3588
3589        INIT_G();
3590
3591        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3592         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3593        if (ENABLE_LOCALE_SUPPORT)
3594                setlocale(LC_NUMERIC, "C");
3595
3596        /* initialize variables */
3597        vhash = hash_init();
3598        {
3599                char *vnames = (char *)vNames; /* cheat */
3600                char *vvalues = (char *)vValues;
3601                for (i = 0; *vnames; i++) {
3602                        var *v;
3603                        intvar[i] = v = newvar(nextword(&vnames));
3604                        if (*vvalues != '\377')
3605                                setvar_s(v, nextword(&vvalues));
3606                        else
3607                                setvar_i(v, 0);
3608
3609                        if (*vnames == '*') {
3610                                v->type |= VF_SPECIAL;
3611                                vnames++;
3612                        }
3613                }
3614        }
3615
3616        handle_special(intvar[FS]);
3617        handle_special(intvar[RS]);
3618
3619        /* Huh, people report that sometimes environ is NULL. Oh well. */
3620        if (environ) {
3621                char **envp;
3622                for (envp = environ; *envp; envp++) {
3623                        /* environ is writable, thus we don't strdup it needlessly */
3624                        char *s = *envp;
3625                        char *s1 = strchr(s, '=');
3626                        if (s1) {
3627                                *s1 = '\0';
3628                                /* Both findvar and setvar_u take const char*
3629                                 * as 2nd arg -> environment is not trashed */
3630                                setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3631                                *s1 = '=';
3632                        }
3633                }
3634        }
3635        opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3636        argv += optind;
3637        //argc -= optind;
3638        if (opt & OPT_W)
3639                bb_simple_error_msg("warning: option -W is ignored");
3640        if (opt & OPT_F) {
3641                unescape_string_in_place(opt_F);
3642                setvar_s(intvar[FS], opt_F);
3643        }
3644        while (list_v) {
3645                if (!is_assignment(llist_pop(&list_v)))
3646                        bb_show_usage();
3647        }
3648
3649        /* Parse all supplied programs */
3650        fnhash = hash_init();
3651        ahash = hash_init();
3652        while (list_f) {
3653                int fd;
3654                char *s;
3655
3656                g_progname = llist_pop(&list_f);
3657                fd = xopen_stdin(g_progname);
3658                s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3659                close(fd);
3660                parse_program(s);
3661                free(s);
3662        }
3663        g_progname = "cmd. line";
3664#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3665        while (list_e) {
3666                parse_program(llist_pop(&list_e));
3667        }
3668#endif
3669//FIXME: preserve order of -e and -f
3670//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
3671        if (!(opt & (OPT_f | OPT_e))) {
3672                if (!*argv)
3673                        bb_show_usage();
3674                parse_program(*argv++);
3675        }
3676        /* Free unused parse structures */
3677        //hash_free(fnhash); // ~250 bytes when empty, used only for function names
3678        //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3679        // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3680        free(fnhash->items);
3681        free(fnhash);
3682        fnhash = NULL; // debug
3683        //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3684
3685        /* Parsing done, on to executing */
3686
3687        /* fill in ARGV array */
3688        setari_u(intvar[ARGV], 0, "awk");
3689        i = 0;
3690        while (*argv)
3691                setari_u(intvar[ARGV], ++i, *argv++);
3692        setvar_i(intvar[ARGC], i + 1);
3693
3694        //fdhash = ahash; // done via define
3695        newfile("/dev/stdin")->F = stdin;
3696        newfile("/dev/stdout")->F = stdout;
3697        newfile("/dev/stderr")->F = stderr;
3698
3699        evaluate(beginseq.first, &G.main__tmpvar);
3700        if (!mainseq.first && !endseq.first)
3701                awk_exit();
3702
3703        /* input file could already be opened in BEGIN block */
3704        if (!iF)
3705                iF = next_input_file();
3706
3707        /* passing through input files */
3708        while (iF) {
3709                nextfile = FALSE;
3710                setvar_i(intvar[FNR], 0);
3711
3712                while ((i = awk_getline(iF, intvar[F0])) > 0) {
3713                        nextrec = FALSE;
3714                        incvar(intvar[NR]);
3715                        incvar(intvar[FNR]);
3716                        evaluate(mainseq.first, &G.main__tmpvar);
3717
3718                        if (nextfile)
3719                                break;
3720                }
3721
3722                if (i < 0)
3723                        syntax_error(strerror(errno));
3724
3725                iF = next_input_file();
3726        }
3727
3728        awk_exit();
3729        /*return 0;*/
3730}
3731