busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9//config:config AWK
  10//config:       bool "awk (23 kb)"
  11//config:       default y
  12//config:       help
  13//config:       Awk is used as a pattern scanning and processing language.
  14//config:
  15//config:config FEATURE_AWK_LIBM
  16//config:       bool "Enable math functions (requires libm)"
  17//config:       default y
  18//config:       depends on AWK
  19//config:       help
  20//config:       Enable math functions of the Awk programming language.
  21//config:       NOTE: This requires libm to be present for linking.
  22//config:
  23//config:config FEATURE_AWK_GNU_EXTENSIONS
  24//config:       bool "Enable a few GNU extensions"
  25//config:       default y
  26//config:       depends on AWK
  27//config:       help
  28//config:       Enable a few features from gawk:
  29//config:       * command line option -e AWK_PROGRAM
  30//config:       * simultaneous use of -f and -e on the command line.
  31//config:       This enables the use of awk library files.
  32//config:       Example: awk -f mylib.awk -e '{print myfunction($1);}' ...
  33
  34//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
  35
  36//kbuild:lib-$(CONFIG_AWK) += awk.o
  37
  38//usage:#define awk_trivial_usage
  39//usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
  40//usage:#define awk_full_usage "\n\n"
  41//usage:       "        -v VAR=VAL      Set variable"
  42//usage:     "\n        -F SEP          Use SEP as field separator"
  43//usage:     "\n        -f FILE         Read program from FILE"
  44//usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
  45//usage:     "\n        -e AWK_PROGRAM"
  46//usage:        )
  47
  48#include "libbb.h"
  49#include "xregex.h"
  50#include <math.h>
  51
  52/* This is a NOEXEC applet. Be very careful! */
  53
  54
  55/* If you comment out one of these below, it will be #defined later
  56 * to perform debug printfs to stderr: */
  57#define debug_printf_walker(...)  do {} while (0)
  58#define debug_printf_eval(...)  do {} while (0)
  59#define debug_printf_parse(...)  do {} while (0)
  60
  61#ifndef debug_printf_walker
  62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
  63#endif
  64#ifndef debug_printf_eval
  65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
  66#endif
  67#ifndef debug_printf_parse
  68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
  69#else
  70# define debug_parse_print_tc(...) ((void)0)
  71#endif
  72
  73
  74/* "+": stop on first non-option:
  75 * $ awk 'BEGIN { for(i=1; i<ARGC; ++i) { print i ": " ARGV[i] }}' -argz
  76 * 1: -argz
  77 */
  78#define OPTSTR_AWK "+" \
  79        "F:v:*f:*" \
  80        IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
  81        "W:"
  82enum {
  83        OPTBIT_F,       /* define field separator */
  84        OPTBIT_v,       /* define variable */
  85        OPTBIT_f,       /* pull in awk program from file */
  86        IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
  87        OPTBIT_W,       /* -W ignored */
  88        OPT_F = 1 << OPTBIT_F,
  89        OPT_v = 1 << OPTBIT_v,
  90        OPT_f = 1 << OPTBIT_f,
  91        OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
  92        OPT_W = 1 << OPTBIT_W
  93};
  94
  95#define MAXVARFMT       240
  96
  97/* variable flags */
  98#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  99#define VF_ARRAY        0x0002  /* 1 = it's an array */
 100
 101#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
 102#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
 103#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
 104#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
 105#define VF_FSTR         0x1000  /* 1 = don't free() var::string (not malloced, or is owned by something else) */
 106#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
 107#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
 108
 109/* these flags are static, don't change them when value is changed */
 110#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
 111
 112typedef struct walker_list {
 113        char *end;
 114        char *cur;
 115        struct walker_list *prev;
 116        char wbuf[1];
 117} walker_list;
 118
 119/* Variable */
 120typedef struct var_s {
 121        unsigned type;            /* flags */
 122        char *string;
 123        double number;
 124        union {
 125                int aidx;               /* func arg idx (for compilation stage) */
 126                struct xhash_s *array;  /* array ptr */
 127                struct var_s *parent;   /* for func args, ptr to actual parameter */
 128                walker_list *walker;    /* list of array elements (for..in) */
 129        } x;
 130} var;
 131
 132/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
 133typedef struct chain_s {
 134        struct node_s *first;
 135        struct node_s *last;
 136        const char *programname;
 137} chain;
 138
 139/* Function */
 140typedef struct func_s {
 141        unsigned nargs;
 142        smallint defined;
 143        struct chain_s body;
 144} func;
 145
 146/* I/O stream */
 147typedef struct rstream_s {
 148        FILE *F;
 149        char *buffer;
 150        int adv;
 151        int size;
 152        int pos;
 153        smallint is_pipe;
 154} rstream;
 155
 156typedef struct hash_item_s {
 157        union {
 158                struct var_s v;         /* variable/array hash */
 159                struct rstream_s rs;    /* redirect streams hash */
 160                struct func_s f;        /* functions hash */
 161        } data;
 162        struct hash_item_s *next;       /* next in chain */
 163        char name[1];                   /* really it's longer */
 164} hash_item;
 165
 166typedef struct xhash_s {
 167        unsigned nel;           /* num of elements */
 168        unsigned csize;         /* current hash size */
 169        unsigned nprime;        /* next hash size in PRIMES[] */
 170        unsigned glen;          /* summary length of item names */
 171        struct hash_item_s **items;
 172} xhash;
 173
 174/* Tree node */
 175typedef struct node_s {
 176        uint32_t info;
 177        unsigned lineno;
 178        union {
 179                struct node_s *n;
 180                var *v;
 181                int aidx;
 182                const char *new_progname;
 183                regex_t *re;
 184        } l;
 185        union {
 186                struct node_s *n;
 187                regex_t *ire;
 188                func *f;
 189        } r;
 190        union {
 191                struct node_s *n;
 192        } a;
 193} node;
 194
 195typedef struct tsplitter_s {
 196        node n;
 197        regex_t re[2];
 198} tsplitter;
 199
 200/* simple token classes */
 201/* order and hex values are very important!!!  See next_token() */
 202#define TC_LPAREN       (1 << 0)        /* ( */
 203#define TC_RPAREN       (1 << 1)        /* ) */
 204#define TC_REGEXP       (1 << 2)        /* /.../ */
 205#define TC_OUTRDR       (1 << 3)        /* | > >> */
 206#define TC_UOPPOST      (1 << 4)        /* unary postfix operator ++ -- */
 207#define TC_UOPPRE1      (1 << 5)        /* unary prefix operator ++ -- $ */
 208#define TC_BINOPX       (1 << 6)        /* two-opnd operator */
 209#define TC_IN           (1 << 7)        /* 'in' */
 210#define TC_COMMA        (1 << 8)        /* , */
 211#define TC_PIPE         (1 << 9)        /* input redirection pipe | */
 212#define TC_UOPPRE2      (1 << 10)       /* unary prefix operator + - ! */
 213#define TC_ARRTERM      (1 << 11)       /* ] */
 214#define TC_LBRACE       (1 << 12)       /* { */
 215#define TC_RBRACE       (1 << 13)       /* } */
 216#define TC_SEMICOL      (1 << 14)       /* ; */
 217#define TC_NEWLINE      (1 << 15)
 218#define TC_STATX        (1 << 16)       /* ctl statement (for, next...) */
 219#define TC_WHILE        (1 << 17)       /* 'while' */
 220#define TC_ELSE         (1 << 18)       /* 'else' */
 221#define TC_BUILTIN      (1 << 19)
 222/* This costs ~50 bytes of code.
 223 * A separate class to support deprecated "length" form. If we don't need that
 224 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
 225 * can be merged with TC_BUILTIN:
 226 */
 227#define TC_LENGTH       (1 << 20)       /* 'length' */
 228#define TC_GETLINE      (1 << 21)       /* 'getline' */
 229#define TC_FUNCDECL     (1 << 22)       /* 'function' 'func' */
 230#define TC_BEGIN        (1 << 23)       /* 'BEGIN' */
 231#define TC_END          (1 << 24)       /* 'END' */
 232#define TC_EOF          (1 << 25)
 233#define TC_VARIABLE     (1 << 26)       /* name */
 234#define TC_ARRAY        (1 << 27)       /* name[ */
 235#define TC_FUNCTION     (1 << 28)       /* name( */
 236#define TC_STRING       (1 << 29)       /* "..." */
 237#define TC_NUMBER       (1 << 30)
 238
 239#ifndef debug_parse_print_tc
 240static void debug_parse_print_tc(uint32_t n)
 241{
 242        if (n & TC_LPAREN  ) debug_printf_parse(" LPAREN"  );
 243        if (n & TC_RPAREN  ) debug_printf_parse(" RPAREN"  );
 244        if (n & TC_REGEXP  ) debug_printf_parse(" REGEXP"  );
 245        if (n & TC_OUTRDR  ) debug_printf_parse(" OUTRDR"  );
 246        if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
 247        if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
 248        if (n & TC_BINOPX  ) debug_printf_parse(" BINOPX"  );
 249        if (n & TC_IN      ) debug_printf_parse(" IN"      );
 250        if (n & TC_COMMA   ) debug_printf_parse(" COMMA"   );
 251        if (n & TC_PIPE    ) debug_printf_parse(" PIPE"    );
 252        if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
 253        if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
 254        if (n & TC_LBRACE  ) debug_printf_parse(" LBRACE"  );
 255        if (n & TC_RBRACE  ) debug_printf_parse(" RBRACE"  );
 256        if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
 257        if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
 258        if (n & TC_STATX   ) debug_printf_parse(" STATX"   );
 259        if (n & TC_WHILE   ) debug_printf_parse(" WHILE"   );
 260        if (n & TC_ELSE    ) debug_printf_parse(" ELSE"    );
 261        if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
 262        if (n & TC_LENGTH  ) debug_printf_parse(" LENGTH"  );
 263        if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
 264        if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
 265        if (n & TC_BEGIN   ) debug_printf_parse(" BEGIN"   );
 266        if (n & TC_END     ) debug_printf_parse(" END"     );
 267        if (n & TC_EOF     ) debug_printf_parse(" EOF"     );
 268        if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
 269        if (n & TC_ARRAY   ) debug_printf_parse(" ARRAY"   );
 270        if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
 271        if (n & TC_STRING  ) debug_printf_parse(" STRING"  );
 272        if (n & TC_NUMBER  ) debug_printf_parse(" NUMBER"  );
 273}
 274#endif
 275
 276/* combined token classes ("token [class] sets") */
 277#define TS_UOPPRE   (TC_UOPPRE1 | TC_UOPPRE2)
 278
 279#define TS_BINOP    (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 280//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
 281#define TS_OPERAND  (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 282                    | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 283                    | TC_LPAREN | TC_STRING | TC_NUMBER)
 284
 285#define TS_LVALUE   (TC_VARIABLE | TC_ARRAY)
 286#define TS_STATEMNT (TC_STATX | TC_WHILE)
 287
 288/* word tokens, cannot mean something else if not expected */
 289#define TS_WORD     (TC_IN | TS_STATEMNT | TC_ELSE \
 290                    | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 291                    | TC_FUNCDECL | TC_BEGIN | TC_END)
 292
 293/* discard newlines after these */
 294#define TS_NOTERM   (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
 295                    | TC_SEMICOL | TC_NEWLINE)
 296
 297/* what can expression begin with */
 298#define TS_OPSEQ    (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
 299/* what can group begin with */
 300#define TS_GRPSEQ   (TS_OPSEQ | TS_STATEMNT \
 301                    | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
 302
 303/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
 304/* operator is inserted between them */
 305#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
 306                   | TC_STRING | TC_NUMBER | TC_UOPPOST \
 307                   | TC_LENGTH)
 308#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
 309
 310#define OF_RES1     0x010000
 311#define OF_RES2     0x020000
 312#define OF_STR1     0x040000
 313#define OF_STR2     0x080000
 314#define OF_NUM1     0x100000
 315#define OF_CHECKED  0x200000
 316#define OF_REQUIRED 0x400000
 317
 318/* combined operator flags */
 319#define xx      0
 320#define xV      OF_RES2
 321#define xS      (OF_RES2 | OF_STR2)
 322#define Vx      OF_RES1
 323#define Rx      OF_REQUIRED
 324#define VV      (OF_RES1 | OF_RES2)
 325#define Nx      (OF_RES1 | OF_NUM1)
 326#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 327#define Sx      (OF_RES1 | OF_STR1)
 328#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 329#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 330
 331#define OPCLSMASK 0xFF00
 332#define OPNMASK   0x007F
 333
 334/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 335 * (for builtins it has different meaning)
 336 */
 337#undef P
 338#undef PRIMASK
 339#undef PRIMASK2
 340#define P(x)      (x << 24)
 341#define PRIMASK   0x7F000000
 342#define PRIMASK2  0x7E000000
 343
 344/* Operation classes */
 345#define SHIFT_TIL_THIS  0x0600
 346#define RECUR_FROM_THIS 0x1000
 347enum {
 348        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 349        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 350
 351        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 352        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 353        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 354
 355        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 356        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 357        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 358        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 359        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 360        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 361        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 362        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 363        OC_DONE = 0x2800,
 364
 365        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 366        ST_WHILE = 0x3300
 367};
 368
 369/* simple builtins */
 370enum {
 371        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 372        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 373};
 374
 375/* builtins */
 376enum {
 377        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
 378        B_ge,   B_gs,   B_su,
 379        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 380};
 381
 382/* tokens and their corresponding info values */
 383
 384#define NTC     "\377"  /* switch to next token class (tc<<1) */
 385#define NTCC    '\377'
 386
 387static const char tokenlist[] ALIGN1 =
 388        "\1("         NTC                                   /* TC_LPAREN */
 389        "\1)"         NTC                                   /* TC_RPAREN */
 390        "\1/"         NTC                                   /* TC_REGEXP */
 391        "\2>>"        "\1>"         "\1|"       NTC         /* TC_OUTRDR */
 392        "\2++"        "\2--"        NTC                     /* TC_UOPPOST */
 393        "\2++"        "\2--"        "\1$"       NTC         /* TC_UOPPRE1 */
 394        "\2=="        "\1="         "\2+="      "\2-="      /* TC_BINOPX */
 395        "\2*="        "\2/="        "\2%="      "\2^="
 396        "\1+"         "\1-"         "\3**="     "\2**"
 397        "\1/"         "\1%"         "\1^"       "\1*"
 398        "\2!="        "\2>="        "\2<="      "\1>"
 399        "\1<"         "\2!~"        "\1~"       "\2&&"
 400        "\2||"        "\1?"         "\1:"       NTC
 401        "\2in"        NTC                                   /* TC_IN */
 402        "\1,"         NTC                                   /* TC_COMMA */
 403        "\1|"         NTC                                   /* TC_PIPE */
 404        "\1+"         "\1-"         "\1!"       NTC         /* TC_UOPPRE2 */
 405        "\1]"         NTC                                   /* TC_ARRTERM */
 406        "\1{"         NTC                                   /* TC_LBRACE */
 407        "\1}"         NTC                                   /* TC_RBRACE */
 408        "\1;"         NTC                                   /* TC_SEMICOL */
 409        "\1\n"        NTC                                   /* TC_NEWLINE */
 410        "\2if"        "\2do"        "\3for"     "\5break"   /* TC_STATX */
 411        "\10continue" "\6delete"    "\5print"
 412        "\6printf"    "\4next"      "\10nextfile"
 413        "\6return"    "\4exit"      NTC
 414        "\5while"     NTC                                   /* TC_WHILE */
 415        "\4else"      NTC                                   /* TC_ELSE */
 416        "\3and"       "\5compl"     "\6lshift"  "\2or"      /* TC_BUILTIN */
 417        "\6rshift"    "\3xor"
 418        "\5close"     "\6system"    "\6fflush"  "\5atan2"
 419        "\3cos"       "\3exp"       "\3int"     "\3log"
 420        "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
 421        "\6gensub"    "\4gsub"      "\5index"   /* "\6length" was here */
 422        "\5match"     "\5split"     "\7sprintf" "\3sub"
 423        "\6substr"    "\7systime"   "\10strftime" "\6mktime"
 424        "\7tolower"   "\7toupper"   NTC
 425        "\6length"    NTC                                   /* TC_LENGTH */
 426        "\7getline"   NTC                                   /* TC_GETLINE */
 427        "\4func"      "\10function" NTC                     /* TC_FUNCDECL */
 428        "\5BEGIN"     NTC                                   /* TC_BEGIN */
 429        "\3END"                                             /* TC_END */
 430        /* compiler adds trailing "\0" */
 431        ;
 432
 433static const uint32_t tokeninfo[] ALIGN4 = {
 434        0,
 435        0,
 436#define TI_REGEXP OC_REGEXP
 437        TI_REGEXP,
 438        xS|'a',                  xS|'w',                  xS|'|',
 439        OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 440#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
 441#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
 442        TI_PREINC,               TI_PREDEC,               OC_FIELD|xV|P(5),
 443        OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
 444        OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
 445        OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
 446        OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
 447        OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 448#define TI_LESS     (OC_COMPARE|VV|P(39)|2)
 449        TI_LESS,                 OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 450#define TI_TERNARY  (OC_TERNARY|Vx|P(64)|'?')
 451#define TI_COLON    (OC_COLON|xx|P(67)|':')
 452        OC_LOR|Vx|P(59),         TI_TERNARY,              TI_COLON,
 453#define TI_IN       (OC_IN|SV|P(49))
 454        TI_IN,
 455#define TI_COMMA    (OC_COMMA|SS|P(80))
 456        TI_COMMA,
 457#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
 458        TI_PGETLINE,
 459        OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 460        0, /* ] */
 461        0,
 462        0,
 463        0,
 464        0, /* \n */
 465        ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 466        OC_CONTINUE,  OC_DELETE|Rx, OC_PRINT,
 467        OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
 468        OC_RETURN|Vx, OC_EXIT|Nx,
 469        ST_WHILE,
 470        0, /* else */
 471// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
 472//  Highest byte bit pattern: nn s3s2s1 v3v2v1
 473//  nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
 474// OC_F's are builtins with zero or one argument.
 475//  |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
 476//  Check for no args is present in builtins' code (not in this table): rand, systime
 477//  Have one _optional_ arg: fflush, srand, length
 478#define OC_B   OC_BUILTIN
 479#define OC_F   OC_FBLTIN
 480#define A1     P(0x40) /*one arg*/
 481#define A2     P(0x80) /*two args*/
 482#define A3     P(0xc0) /*three args*/
 483#define __v    P(1)
 484#define _vv    P(3)
 485#define __s__v P(9)
 486#define __s_vv P(0x0b)
 487#define __svvv P(0x0f)
 488#define _ss_vv P(0x1b)
 489#define _s_vv_ P(0x16)
 490#define ss_vv_ P(0x36)
 491        OC_B|B_an|_vv|A2,   OC_B|B_co|__v|A1,   OC_B|B_ls|_vv|A2,   OC_B|B_or|_vv|A2,   // and    compl   lshift   or
 492        OC_B|B_rs|_vv|A2,   OC_B|B_xo|_vv|A2,                                           // rshift xor
 493        OC_F|F_cl|Sx|Rx,    OC_F|F_sy|Sx|Rx,    OC_F|F_ff|Sx,       OC_B|B_a2|_vv|A2,   // close  system  fflush   atan2
 494        OC_F|F_co|Nx|Rx,    OC_F|F_ex|Nx|Rx,    OC_F|F_in|Nx|Rx,    OC_F|F_lg|Nx|Rx,    // cos    exp     int      log
 495        OC_F|F_rn,          OC_F|F_si|Nx|Rx,    OC_F|F_sq|Nx|Rx,    OC_F|F_sr|Nx,       // rand   sin     sqrt     srand
 496        OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2,                    // gensub gsub    index  /*length was here*/
 497        OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF,         OC_B|B_su|ss_vv_|A2,// match  split   sprintf  sub
 498        OC_B|B_ss|__svvv|A2,OC_F|F_ti,          OC_B|B_ti|__s_vv,   OC_B|B_mt|__s_vv,   // substr systime strftime mktime
 499        OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1,                                        // tolower toupper
 500        OC_F|F_le|Sx,   // length
 501        OC_GETLINE|SV,  // getline
 502        0, 0, // func function
 503        0, // BEGIN
 504        0  // END
 505#undef A1
 506#undef A2
 507#undef A3
 508#undef OC_B
 509#undef OC_F
 510};
 511
 512/* internal variable names and their initial values       */
 513/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 514enum {
 515        CONVFMT,    OFMT,       FS,         OFS,
 516        ORS,        RS,         RT,         FILENAME,
 517        SUBSEP,     F0,         ARGIND,     ARGC,
 518        ARGV,       ERRNO,      FNR,        NR,
 519        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 520};
 521
 522static const char vNames[] ALIGN1 =
 523        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 524        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 525        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 526        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 527        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 528
 529static const char vValues[] ALIGN1 =
 530        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 531        "\n\0"      "\n\0"      "\0"        "\0"
 532        "\034\0"    "\0"        "\377";
 533
 534/* hash size may grow to these values */
 535#define FIRST_PRIME 61
 536static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 537
 538
 539/* Globals. Split in two parts so that first one is addressed
 540 * with (mostly short) negative offsets.
 541 * NB: it's unsafe to put members of type "double"
 542 * into globals2 (gcc may fail to align them).
 543 */
 544struct globals {
 545        double t_double;
 546        chain beginseq, mainseq, endseq;
 547        chain *seq;
 548        node *break_ptr, *continue_ptr;
 549        rstream *iF;
 550        xhash *ahash;  /* argument names, used only while parsing function bodies */
 551        xhash *fnhash; /* function names, used only in parsing stage */
 552        xhash *vhash;  /* variables and arrays */
 553        //xhash *fdhash; /* file objects, used only in execution stage */
 554        //we are reusing ahash as fdhash, via define (see later)
 555        const char *g_progname;
 556        int g_lineno;
 557        int nfields;
 558        int maxfields; /* used in fsrealloc() only */
 559        var *Fields;
 560        char *g_pos;
 561        char g_saved_ch;
 562        smallint icase;
 563        smallint exiting;
 564        smallint nextrec;
 565        smallint nextfile;
 566        smallint is_f0_split;
 567        smallint t_rollback;
 568
 569        /* former statics from various functions */
 570        smallint next_token__concat_inserted;
 571        uint32_t next_token__save_tclass;
 572        uint32_t next_token__save_info;
 573};
 574struct globals2 {
 575        uint32_t t_info; /* often used */
 576        uint32_t t_tclass;
 577        char *t_string;
 578        int t_lineno;
 579
 580        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 581
 582        /* former statics from various functions */
 583        char *split_f0__fstrings;
 584
 585        rstream next_input_file__rsm;
 586        smallint next_input_file__files_happen;
 587
 588        smalluint exitcode;
 589
 590        unsigned evaluate__seed;
 591        var *evaluate__fnargs;
 592        regex_t evaluate__sreg;
 593
 594        var ptest__tmpvar;
 595        var awk_printf__tmpvar;
 596        var as_regex__tmpvar;
 597        var exit__tmpvar;
 598        var main__tmpvar;
 599
 600        tsplitter exec_builtin__tspl;
 601
 602        /* biggest and least used members go last */
 603        tsplitter fsplitter, rsplitter;
 604
 605        char g_buf[MAXVARFMT + 1];
 606};
 607#define G1 (ptr_to_globals[-1])
 608#define G (*(struct globals2 *)ptr_to_globals)
 609/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 610//char G1size[sizeof(G1)]; // 0x70
 611//char Gsize[sizeof(G)]; // 0x2f8
 612/* Trying to keep most of members accessible with short offsets: */
 613//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
 614#define t_double     (G1.t_double    )
 615#define beginseq     (G1.beginseq    )
 616#define mainseq      (G1.mainseq     )
 617#define endseq       (G1.endseq      )
 618#define seq          (G1.seq         )
 619#define break_ptr    (G1.break_ptr   )
 620#define continue_ptr (G1.continue_ptr)
 621#define iF           (G1.iF          )
 622#define ahash        (G1.ahash       )
 623#define fnhash       (G1.fnhash      )
 624#define vhash        (G1.vhash       )
 625#define fdhash       ahash
 626//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
 627// and ends up empty after parsing phase. Thus, we can simply reuse it
 628// for fdhash in execution stage.
 629#define g_progname   (G1.g_progname  )
 630#define g_lineno     (G1.g_lineno    )
 631#define nfields      (G1.nfields     )
 632#define maxfields    (G1.maxfields   )
 633#define Fields       (G1.Fields      )
 634#define g_pos        (G1.g_pos       )
 635#define g_saved_ch   (G1.g_saved_ch  )
 636#define icase        (G1.icase       )
 637#define exiting      (G1.exiting     )
 638#define nextrec      (G1.nextrec     )
 639#define nextfile     (G1.nextfile    )
 640#define is_f0_split  (G1.is_f0_split )
 641#define t_rollback   (G1.t_rollback  )
 642#define t_info       (G.t_info      )
 643#define t_tclass     (G.t_tclass    )
 644#define t_string     (G.t_string    )
 645#define t_lineno     (G.t_lineno    )
 646#define intvar       (G.intvar      )
 647#define fsplitter    (G.fsplitter   )
 648#define rsplitter    (G.rsplitter   )
 649#define g_buf        (G.g_buf       )
 650#define INIT_G() do { \
 651        SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
 652        t_tclass = TC_NEWLINE; \
 653        G.evaluate__seed = 1; \
 654} while (0)
 655
 656static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 657static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 658static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 659static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 660static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
 661static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 662static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 663static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 664static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 665static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
 666
 667static int awk_exit(void) NORETURN;
 668
 669static void syntax_error(const char *message) NORETURN;
 670static void syntax_error(const char *message)
 671{
 672        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 673}
 674
 675/* ---- hash stuff ---- */
 676
 677static unsigned hashidx(const char *name)
 678{
 679        unsigned idx = 0;
 680
 681        while (*name)
 682                idx = *name++ + (idx << 6) - idx;
 683        return idx;
 684}
 685
 686/* create new hash */
 687static xhash *hash_init(void)
 688{
 689        xhash *newhash;
 690
 691        newhash = xzalloc(sizeof(*newhash));
 692        newhash->csize = FIRST_PRIME;
 693        newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
 694
 695        return newhash;
 696}
 697
 698static void hash_clear(xhash *hash)
 699{
 700        unsigned i;
 701        hash_item *hi, *thi;
 702
 703        for (i = 0; i < hash->csize; i++) {
 704                hi = hash->items[i];
 705                while (hi) {
 706                        thi = hi;
 707                        hi = hi->next;
 708//FIXME: this assumes that it's a hash of *variables*:
 709                        free(thi->data.v.string);
 710                        free(thi);
 711                }
 712                hash->items[i] = NULL;
 713        }
 714        hash->glen = hash->nel = 0;
 715}
 716
 717#if 0 //UNUSED
 718static void hash_free(xhash *hash)
 719{
 720        hash_clear(hash);
 721        free(hash->items);
 722        free(hash);
 723}
 724#endif
 725
 726/* find item in hash, return ptr to data, NULL if not found */
 727static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
 728{
 729        hash_item *hi;
 730
 731        hi = hash->items[idx % hash->csize];
 732        while (hi) {
 733                if (strcmp(hi->name, name) == 0)
 734                        return &hi->data;
 735                hi = hi->next;
 736        }
 737        return NULL;
 738}
 739
 740static void *hash_search(xhash *hash, const char *name)
 741{
 742        return hash_search3(hash, name, hashidx(name));
 743}
 744
 745/* grow hash if it becomes too big */
 746static void hash_rebuild(xhash *hash)
 747{
 748        unsigned newsize, i, idx;
 749        hash_item **newitems, *hi, *thi;
 750
 751        if (hash->nprime == ARRAY_SIZE(PRIMES))
 752                return;
 753
 754        newsize = PRIMES[hash->nprime++];
 755        newitems = xzalloc(newsize * sizeof(newitems[0]));
 756
 757        for (i = 0; i < hash->csize; i++) {
 758                hi = hash->items[i];
 759                while (hi) {
 760                        thi = hi;
 761                        hi = thi->next;
 762                        idx = hashidx(thi->name) % newsize;
 763                        thi->next = newitems[idx];
 764                        newitems[idx] = thi;
 765                }
 766        }
 767
 768        free(hash->items);
 769        hash->csize = newsize;
 770        hash->items = newitems;
 771}
 772
 773/* find item in hash, add it if necessary. Return ptr to data */
 774static void *hash_find(xhash *hash, const char *name)
 775{
 776        hash_item *hi;
 777        unsigned idx;
 778        int l;
 779
 780        idx = hashidx(name);
 781        hi = hash_search3(hash, name, idx);
 782        if (!hi) {
 783                if (++hash->nel > hash->csize * 8)
 784                        hash_rebuild(hash);
 785
 786                l = strlen(name) + 1;
 787                hi = xzalloc(sizeof(*hi) + l);
 788                strcpy(hi->name, name);
 789
 790                idx = idx % hash->csize;
 791                hi->next = hash->items[idx];
 792                hash->items[idx] = hi;
 793                hash->glen += l;
 794        }
 795        return &hi->data;
 796}
 797
 798#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 799#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 800#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 801#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 802
 803static void hash_remove(xhash *hash, const char *name)
 804{
 805        hash_item *hi, **phi;
 806
 807        phi = &hash->items[hashidx(name) % hash->csize];
 808        while (*phi) {
 809                hi = *phi;
 810                if (strcmp(hi->name, name) == 0) {
 811                        hash->glen -= (strlen(name) + 1);
 812                        hash->nel--;
 813                        *phi = hi->next;
 814                        free(hi);
 815                        break;
 816                }
 817                phi = &hi->next;
 818        }
 819}
 820
 821/* ------ some useful functions ------ */
 822
 823static char *skip_spaces(char *p)
 824{
 825        for (;;) {
 826                if (*p == '\\' && p[1] == '\n') {
 827                        p++;
 828                        t_lineno++;
 829                } else if (*p != ' ' && *p != '\t') {
 830                        break;
 831                }
 832                p++;
 833        }
 834        return p;
 835}
 836
 837/* returns old *s, advances *s past word and terminating NUL */
 838static char *nextword(char **s)
 839{
 840        char *p = *s;
 841        char *q = p;
 842        while (*q++ != '\0')
 843                continue;
 844        *s = q;
 845        return p;
 846}
 847
 848static char nextchar(char **s)
 849{
 850        char c, *pps;
 851
 852        c = *(*s)++;
 853        pps = *s;
 854        if (c == '\\')
 855                c = bb_process_escape_sequence((const char**)s);
 856        /* Example awk statement:
 857         * s = "abc\"def"
 858         * we must treat \" as "
 859         */
 860        if (c == '\\' && *s == pps) { /* unrecognized \z? */
 861                c = *(*s); /* yes, fetch z */
 862                if (c)
 863                        (*s)++; /* advance unless z = NUL */
 864        }
 865        return c;
 866}
 867
 868/* TODO: merge with strcpy_and_process_escape_sequences()?
 869 */
 870static void unescape_string_in_place(char *s1)
 871{
 872        char *s = s1;
 873        while ((*s1 = nextchar(&s)) != '\0')
 874                s1++;
 875}
 876
 877static ALWAYS_INLINE int isalnum_(int c)
 878{
 879        return (isalnum(c) || c == '_');
 880}
 881
 882static double my_strtod(char **pp)
 883{
 884        char *cp = *pp;
 885        if (ENABLE_DESKTOP && cp[0] == '0') {
 886                /* Might be hex or octal integer: 0x123abc or 07777 */
 887                char c = (cp[1] | 0x20);
 888                if (c == 'x' || isdigit(cp[1])) {
 889                        unsigned long long ull = strtoull(cp, pp, 0);
 890                        if (c == 'x')
 891                                return ull;
 892                        c = **pp;
 893                        if (!isdigit(c) && c != '.')
 894                                return ull;
 895                        /* else: it may be a floating number. Examples:
 896                         * 009.123 (*pp points to '9')
 897                         * 000.123 (*pp points to '.')
 898                         * fall through to strtod.
 899                         */
 900                }
 901        }
 902        return strtod(cp, pp);
 903}
 904
 905/* -------- working with variables (set/get/copy/etc) -------- */
 906
 907static void fmt_num(const char *format, double n)
 908{
 909        if (n == (long long)n) {
 910                snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
 911        } else {
 912                const char *s = format;
 913                char c;
 914
 915                do { c = *s; } while (c && *++s);
 916                if (strchr("diouxX", c)) {
 917                        snprintf(g_buf, MAXVARFMT, format, (int)n);
 918                } else if (strchr("eEfFgGaA", c)) {
 919                        snprintf(g_buf, MAXVARFMT, format, n);
 920                } else {
 921                        syntax_error(EMSG_INV_FMT);
 922                }
 923        }
 924}
 925
 926static xhash *iamarray(var *a)
 927{
 928        while (a->type & VF_CHILD)
 929                a = a->x.parent;
 930
 931        if (!(a->type & VF_ARRAY)) {
 932                a->type |= VF_ARRAY;
 933                a->x.array = hash_init();
 934        }
 935        return a->x.array;
 936}
 937
 938#define clear_array(array) hash_clear(array)
 939
 940/* clear a variable */
 941static var *clrvar(var *v)
 942{
 943        if (!(v->type & VF_FSTR))
 944                free(v->string);
 945
 946        v->type &= VF_DONTTOUCH;
 947        v->type |= VF_DIRTY;
 948        v->string = NULL;
 949        return v;
 950}
 951
 952static void handle_special(var *);
 953
 954/* assign string value to variable */
 955static var *setvar_p(var *v, char *value)
 956{
 957        clrvar(v);
 958        v->string = value;
 959        handle_special(v);
 960        return v;
 961}
 962
 963/* same as setvar_p but make a copy of string */
 964static var *setvar_s(var *v, const char *value)
 965{
 966        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 967}
 968
 969/* same as setvar_s but sets USER flag */
 970static var *setvar_u(var *v, const char *value)
 971{
 972        v = setvar_s(v, value);
 973        v->type |= VF_USER;
 974        return v;
 975}
 976
 977/* set array element to user string */
 978static void setari_u(var *a, int idx, const char *s)
 979{
 980        var *v;
 981
 982        v = findvar(iamarray(a), itoa(idx));
 983        setvar_u(v, s);
 984}
 985
 986/* assign numeric value to variable */
 987static var *setvar_i(var *v, double value)
 988{
 989        clrvar(v);
 990        v->type |= VF_NUMBER;
 991        v->number = value;
 992        handle_special(v);
 993        return v;
 994}
 995
 996static const char *getvar_s(var *v)
 997{
 998        /* if v is numeric and has no cached string, convert it to string */
 999        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
1000                fmt_num(getvar_s(intvar[CONVFMT]), v->number);
1001                v->string = xstrdup(g_buf);
1002                v->type |= VF_CACHED;
1003        }
1004        return (v->string == NULL) ? "" : v->string;
1005}
1006
1007static double getvar_i(var *v)
1008{
1009        char *s;
1010
1011        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
1012                v->number = 0;
1013                s = v->string;
1014                if (s && *s) {
1015                        debug_printf_eval("getvar_i: '%s'->", s);
1016                        v->number = my_strtod(&s);
1017                        debug_printf_eval("%f (s:'%s')\n", v->number, s);
1018                        if (v->type & VF_USER) {
1019//TODO: skip_spaces() also skips backslash+newline, is it intended here?
1020                                s = skip_spaces(s);
1021                                if (*s != '\0')
1022                                        v->type &= ~VF_USER;
1023                        }
1024                } else {
1025                        debug_printf_eval("getvar_i: '%s'->zero\n", s);
1026                        v->type &= ~VF_USER;
1027                }
1028                v->type |= VF_CACHED;
1029        }
1030        debug_printf_eval("getvar_i: %f\n", v->number);
1031        return v->number;
1032}
1033
1034/* Used for operands of bitwise ops */
1035static unsigned long getvar_i_int(var *v)
1036{
1037        double d = getvar_i(v);
1038
1039        /* Casting doubles to longs is undefined for values outside
1040         * of target type range. Try to widen it as much as possible */
1041        if (d >= 0)
1042                return (unsigned long)d;
1043        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
1044        return - (long) (unsigned long) (-d);
1045}
1046
1047static var *copyvar(var *dest, const var *src)
1048{
1049        if (dest != src) {
1050                clrvar(dest);
1051                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
1052                debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
1053                dest->number = src->number;
1054                if (src->string)
1055                        dest->string = xstrdup(src->string);
1056        }
1057        handle_special(dest);
1058        return dest;
1059}
1060
1061static var *incvar(var *v)
1062{
1063        return setvar_i(v, getvar_i(v) + 1.0);
1064}
1065
1066/* return true if v is number or numeric string */
1067static int is_numeric(var *v)
1068{
1069        getvar_i(v);
1070        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
1071}
1072
1073/* return 1 when value of v corresponds to true, 0 otherwise */
1074static int istrue(var *v)
1075{
1076        if (is_numeric(v))
1077                return (v->number != 0);
1078        return (v->string && v->string[0]);
1079}
1080
1081/* ------- awk program text parsing ------- */
1082
1083/* Parse next token pointed by global pos, place results into global t_XYZ variables.
1084 * If token isn't expected, print error message and die.
1085 * Return token class (also store it in t_tclass).
1086 */
1087static uint32_t next_token(uint32_t expected)
1088{
1089#define concat_inserted (G1.next_token__concat_inserted)
1090#define save_tclass     (G1.next_token__save_tclass)
1091#define save_info       (G1.next_token__save_info)
1092
1093        char *p;
1094        const char *tl;
1095        const uint32_t *ti;
1096        uint32_t tc, last_token_class;
1097
1098        last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
1099
1100        debug_printf_parse("%s() expected(%x):", __func__, expected);
1101        debug_parse_print_tc(expected);
1102        debug_printf_parse("\n");
1103
1104        if (t_rollback) {
1105                debug_printf_parse("%s: using rolled-back token\n", __func__);
1106                t_rollback = FALSE;
1107        } else if (concat_inserted) {
1108                debug_printf_parse("%s: using concat-inserted token\n", __func__);
1109                concat_inserted = FALSE;
1110                t_tclass = save_tclass;
1111                t_info = save_info;
1112        } else {
1113                p = g_pos;
1114                if (g_saved_ch != '\0') {
1115                        *p = g_saved_ch;
1116                        g_saved_ch = '\0';
1117                }
1118 readnext:
1119                p = skip_spaces(p);
1120                g_lineno = t_lineno;
1121                if (*p == '#')
1122                        while (*p != '\n' && *p != '\0')
1123                                p++;
1124
1125                if (*p == '\0') {
1126                        tc = TC_EOF;
1127                        debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1128                } else if (*p == '\"') {
1129                        /* it's a string */
1130                        char *s = t_string = ++p;
1131                        while (*p != '\"') {
1132                                char *pp;
1133                                if (*p == '\0' || *p == '\n')
1134                                        syntax_error(EMSG_UNEXP_EOS);
1135                                pp = p;
1136                                *s++ = nextchar(&pp);
1137                                p = pp;
1138                        }
1139                        p++;
1140                        *s = '\0';
1141                        tc = TC_STRING;
1142                        debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1143                } else if ((expected & TC_REGEXP) && *p == '/') {
1144                        /* it's regexp */
1145                        char *s = t_string = ++p;
1146                        while (*p != '/') {
1147                                if (*p == '\0' || *p == '\n')
1148                                        syntax_error(EMSG_UNEXP_EOS);
1149                                *s = *p++;
1150                                if (*s++ == '\\') {
1151                                        char *pp = p;
1152                                        s[-1] = bb_process_escape_sequence((const char **)&pp);
1153                                        if (*p == '\\')
1154                                                *s++ = '\\';
1155                                        if (pp == p)
1156                                                *s++ = *p++;
1157                                        else
1158                                                p = pp;
1159                                }
1160                        }
1161                        p++;
1162                        *s = '\0';
1163                        tc = TC_REGEXP;
1164                        debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1165
1166                } else if (*p == '.' || isdigit(*p)) {
1167                        /* it's a number */
1168                        char *pp = p;
1169                        t_double = my_strtod(&pp);
1170                        p = pp;
1171                        if (*p == '.')
1172                                syntax_error(EMSG_UNEXP_TOKEN);
1173                        tc = TC_NUMBER;
1174                        debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1175                } else {
1176                        char *end_of_name;
1177
1178                        if (*p == '\n')
1179                                t_lineno++;
1180
1181                        /* search for something known */
1182                        tl = tokenlist;
1183                        tc = 0x00000001;
1184                        ti = tokeninfo;
1185                        while (*tl) {
1186                                int l = (unsigned char) *tl++;
1187                                if (l == (unsigned char) NTCC) {
1188                                        tc <<= 1;
1189                                        continue;
1190                                }
1191                                /* if token class is expected,
1192                                 * token matches,
1193                                 * and it's not a longer word,
1194                                 */
1195                                if ((tc & (expected | TS_WORD | TC_NEWLINE))
1196                                 && strncmp(p, tl, l) == 0
1197                                 && !((tc & TS_WORD) && isalnum_(p[l]))
1198                                ) {
1199                                        /* then this is what we are looking for */
1200                                        t_info = *ti;
1201                                        debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1202                                        p += l;
1203                                        goto token_found;
1204                                }
1205                                ti++;
1206                                tl += l;
1207                        }
1208                        /* not a known token */
1209
1210                        /* is it a name? (var/array/function) */
1211                        if (!isalnum_(*p))
1212                                syntax_error(EMSG_UNEXP_TOKEN); /* no */
1213                        /* yes */
1214                        t_string = p;
1215                        while (isalnum_(*p))
1216                                p++;
1217                        end_of_name = p;
1218
1219                        if (last_token_class == TC_FUNCDECL)
1220                                /* eat space in "function FUNC (...) {...}" declaration */
1221                                p = skip_spaces(p);
1222                        else if (expected & TC_ARRAY) {
1223                                /* eat space between array name and [ */
1224                                char *s = skip_spaces(p);
1225                                if (*s == '[') /* array ref, not just a name? */
1226                                        p = s;
1227                        }
1228                        /* else: do NOT consume whitespace after variable name!
1229                         * gawk allows definition "function FUNC (p) {...}" - note space,
1230                         * but disallows the call "FUNC (p)" because it isn't one -
1231                         * expression "v (a)" should NOT be parsed as TC_FUNCTION:
1232                         * it is a valid concatenation if "v" is a variable,
1233                         * not a function name (and type of name is not known at parse time).
1234                         */
1235
1236                        if (*p == '(') {
1237                                p++;
1238                                tc = TC_FUNCTION;
1239                                debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1240                        } else if (*p == '[') {
1241                                p++;
1242                                tc = TC_ARRAY;
1243                                debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1244                        } else {
1245                                tc = TC_VARIABLE;
1246                                debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1247                                if (end_of_name == p) {
1248                                        /* there is no space for trailing NUL in t_string!
1249                                         * We need to save the char we are going to NUL.
1250                                         * (we'll use it in future call to next_token())
1251                                         */
1252                                        g_saved_ch = *end_of_name;
1253// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
1254// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
1255// '.' and analyze it later: we also have to *store it back* in next
1256// next_token(), in order to give my_strtod() the undamaged ".2" string.
1257                                }
1258                        }
1259                        *end_of_name = '\0'; /* terminate t_string */
1260                }
1261 token_found:
1262                g_pos = p;
1263
1264                /* skipping newlines in some cases */
1265                if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1266                        goto readnext;
1267
1268                /* insert concatenation operator when needed */
1269                debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1270                        (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1271                        !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1272                if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1273                 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
1274                ) {
1275                        concat_inserted = TRUE;
1276                        save_tclass = tc;
1277                        save_info = t_info;
1278                        tc = TC_BINOPX;
1279                        t_info = OC_CONCAT | SS | P(35);
1280                }
1281
1282                t_tclass = tc;
1283                debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1284        }
1285        /* Are we ready for this? */
1286        if (!(t_tclass & expected)) {
1287                syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1288                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1289        }
1290
1291        debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1292        debug_parse_print_tc(t_tclass);
1293        debug_printf_parse("\n");
1294
1295        return t_tclass;
1296#undef concat_inserted
1297#undef save_tclass
1298#undef save_info
1299}
1300
1301static ALWAYS_INLINE void rollback_token(void)
1302{
1303        t_rollback = TRUE;
1304}
1305
1306static node *new_node(uint32_t info)
1307{
1308        node *n;
1309
1310        n = xzalloc(sizeof(node));
1311        n->info = info;
1312        n->lineno = g_lineno;
1313        return n;
1314}
1315
1316static void mk_re_node(const char *s, node *n, regex_t *re)
1317{
1318        n->info = TI_REGEXP;
1319        n->l.re = re;
1320        n->r.ire = re + 1;
1321        xregcomp(re, s, REG_EXTENDED);
1322        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1323}
1324
1325static node *parse_expr(uint32_t);
1326
1327static node *parse_lrparen_list(void)
1328{
1329        next_token(TC_LPAREN);
1330        return parse_expr(TC_RPAREN);
1331}
1332
1333/* parse expression terminated by given argument, return ptr
1334 * to built subtree. Terminator is eaten by parse_expr */
1335static node *parse_expr(uint32_t term_tc)
1336{
1337        node sn;
1338        node *cn = &sn;
1339        node *vn, *glptr;
1340        uint32_t tc, expected_tc;
1341        var *v;
1342
1343        debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1344        debug_parse_print_tc(term_tc);
1345        debug_printf_parse("\n");
1346
1347        sn.info = PRIMASK;
1348        sn.r.n = sn.a.n = glptr = NULL;
1349        expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1350
1351        while (!((tc = next_token(expected_tc)) & term_tc)) {
1352
1353                if (glptr && (t_info == TI_LESS)) {
1354                        /* input redirection (<) attached to glptr node */
1355                        debug_printf_parse("%s: input redir\n", __func__);
1356                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1357                        cn->a.n = glptr;
1358                        expected_tc = TS_OPERAND | TS_UOPPRE;
1359                        glptr = NULL;
1360                        continue;
1361                }
1362                if (tc & (TS_BINOP | TC_UOPPOST)) {
1363                        debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1364                        /* for binary and postfix-unary operators, jump back over
1365                         * previous operators with higher priority */
1366                        vn = cn;
1367                        while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1368                            || ((t_info == vn->info) && t_info == TI_COLON)
1369                        ) {
1370                                vn = vn->a.n;
1371                                if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1372                        }
1373                        if (t_info == TI_TERNARY)
1374//TODO: why?
1375                                t_info += P(6);
1376                        cn = vn->a.n->r.n = new_node(t_info);
1377                        cn->a.n = vn->a.n;
1378                        if (tc & TS_BINOP) {
1379                                cn->l.n = vn;
1380//FIXME: this is the place to detect and reject assignments to non-lvalues.
1381//Currently we allow "assignments" to consts and temporaries, nonsense like this:
1382// awk 'BEGIN { "qwe" = 1 }'
1383// awk 'BEGIN { 7 *= 7 }'
1384// awk 'BEGIN { length("qwe") = 1 }'
1385// awk 'BEGIN { (1+1) += 3 }'
1386                                expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1387                                if (t_info == TI_PGETLINE) {
1388                                        /* it's a pipe */
1389                                        next_token(TC_GETLINE);
1390                                        /* give maximum priority to this pipe */
1391                                        cn->info &= ~PRIMASK;
1392                                        expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1393                                }
1394                        } else {
1395                                cn->r.n = vn;
1396                                expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1397                        }
1398                        vn->a.n = cn;
1399                        continue;
1400                }
1401
1402                debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1403                /* for operands and prefix-unary operators, attach them
1404                 * to last node */
1405                vn = cn;
1406                cn = vn->r.n = new_node(t_info);
1407                cn->a.n = vn;
1408
1409                expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1410                if (t_info == TI_PREINC || t_info == TI_PREDEC)
1411                        expected_tc = TS_LVALUE | TC_UOPPRE1;
1412
1413                if (!(tc & (TS_OPERAND | TC_REGEXP)))
1414                        continue;
1415
1416                debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1417                expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1418                /* one should be very careful with switch on tclass -
1419                 * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1420                switch (tc) {
1421                case TC_VARIABLE:
1422                case TC_ARRAY:
1423                        debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1424                        cn->info = OC_VAR;
1425                        v = hash_search(ahash, t_string);
1426                        if (v != NULL) {
1427                                cn->info = OC_FNARG;
1428                                cn->l.aidx = v->x.aidx;
1429                        } else {
1430                                cn->l.v = newvar(t_string);
1431                        }
1432                        if (tc & TC_ARRAY) {
1433                                cn->info |= xS;
1434                                cn->r.n = parse_expr(TC_ARRTERM);
1435                        }
1436                        break;
1437
1438                case TC_NUMBER:
1439                case TC_STRING:
1440                        debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1441                        cn->info = OC_VAR;
1442                        v = cn->l.v = xzalloc(sizeof(var));
1443                        if (tc & TC_NUMBER)
1444                                setvar_i(v, t_double);
1445                        else {
1446                                setvar_s(v, t_string);
1447                                expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1448                        }
1449                        break;
1450
1451                case TC_REGEXP:
1452                        debug_printf_parse("%s: TC_REGEXP\n", __func__);
1453                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1454                        break;
1455
1456                case TC_FUNCTION:
1457                        debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1458                        cn->info = OC_FUNC;
1459                        cn->r.f = newfunc(t_string);
1460                        cn->l.n = parse_expr(TC_RPAREN);
1461                        break;
1462
1463                case TC_LPAREN:
1464                        debug_printf_parse("%s: TC_LPAREN\n", __func__);
1465                        cn = vn->r.n = parse_expr(TC_RPAREN);
1466                        if (!cn)
1467                                syntax_error("Empty sequence");
1468                        cn->a.n = vn;
1469                        break;
1470
1471                case TC_GETLINE:
1472                        debug_printf_parse("%s: TC_GETLINE\n", __func__);
1473                        glptr = cn;
1474                        expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1475                        break;
1476
1477                case TC_BUILTIN:
1478                        debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1479                        cn->l.n = parse_lrparen_list();
1480                        break;
1481
1482                case TC_LENGTH:
1483                        debug_printf_parse("%s: TC_LENGTH\n", __func__);
1484                        tc = next_token(TC_LPAREN /* length(...) */
1485                                | TC_SEMICOL   /* length; */
1486                                | TC_NEWLINE   /* length<newline> */
1487                                | TC_RBRACE    /* length } */
1488                                | TC_BINOPX    /* length <op> NUM */
1489                                | TC_COMMA     /* print length, 1 */
1490                        );
1491                        if (tc != TC_LPAREN)
1492                                rollback_token();
1493                        else {
1494                                /* It was a "(" token. Handle just like TC_BUILTIN */
1495                                cn->l.n = parse_expr(TC_RPAREN);
1496                        }
1497                        break;
1498                }
1499        } /* while() */
1500
1501        debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1502        return sn.r.n;
1503}
1504
1505/* add node to chain. Return ptr to alloc'd node */
1506static node *chain_node(uint32_t info)
1507{
1508        node *n;
1509
1510        if (!seq->first)
1511                seq->first = seq->last = new_node(0);
1512
1513        if (seq->programname != g_progname) {
1514                seq->programname = g_progname;
1515                n = chain_node(OC_NEWSOURCE);
1516                n->l.new_progname = g_progname;
1517        }
1518
1519        n = seq->last;
1520        n->info = info;
1521        seq->last = n->a.n = new_node(OC_DONE);
1522
1523        return n;
1524}
1525
1526static void chain_expr(uint32_t info)
1527{
1528        node *n;
1529
1530        n = chain_node(info);
1531
1532        n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1533        if ((info & OF_REQUIRED) && !n->l.n)
1534                syntax_error(EMSG_TOO_FEW_ARGS);
1535
1536        if (t_tclass & TC_RBRACE)
1537                rollback_token();
1538}
1539
1540static void chain_group(void);
1541
1542static node *chain_loop(node *nn)
1543{
1544        node *n, *n2, *save_brk, *save_cont;
1545
1546        save_brk = break_ptr;
1547        save_cont = continue_ptr;
1548
1549        n = chain_node(OC_BR | Vx);
1550        continue_ptr = new_node(OC_EXEC);
1551        break_ptr = new_node(OC_EXEC);
1552        chain_group();
1553        n2 = chain_node(OC_EXEC | Vx);
1554        n2->l.n = nn;
1555        n2->a.n = n;
1556        continue_ptr->a.n = n2;
1557        break_ptr->a.n = n->r.n = seq->last;
1558
1559        continue_ptr = save_cont;
1560        break_ptr = save_brk;
1561
1562        return n;
1563}
1564
1565static void chain_until_rbrace(void)
1566{
1567        uint32_t tc;
1568        while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1569                debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1570                if (tc == TC_NEWLINE)
1571                        continue;
1572                rollback_token();
1573                chain_group();
1574        }
1575        debug_printf_parse("%s: TC_RBRACE\n", __func__);
1576}
1577
1578/* parse group and attach it to chain */
1579static void chain_group(void)
1580{
1581        uint32_t tc;
1582        node *n, *n2, *n3;
1583
1584        do {
1585                tc = next_token(TS_GRPSEQ);
1586        } while (tc == TC_NEWLINE);
1587
1588        if (tc == TC_LBRACE) {
1589                debug_printf_parse("%s: TC_LBRACE\n", __func__);
1590                chain_until_rbrace();
1591                return;
1592        }
1593        if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1594                debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1595                rollback_token();
1596                chain_expr(OC_EXEC | Vx);
1597                return;
1598        }
1599
1600        /* TS_STATEMNT */
1601        debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1602        switch (t_info & OPCLSMASK) {
1603        case ST_IF:
1604                debug_printf_parse("%s: ST_IF\n", __func__);
1605                n = chain_node(OC_BR | Vx);
1606                n->l.n = parse_lrparen_list();
1607                chain_group();
1608                n2 = chain_node(OC_EXEC);
1609                n->r.n = seq->last;
1610                if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1611                        chain_group();
1612                        n2->a.n = seq->last;
1613                } else {
1614                        rollback_token();
1615                }
1616                break;
1617
1618        case ST_WHILE:
1619                debug_printf_parse("%s: ST_WHILE\n", __func__);
1620                n2 = parse_lrparen_list();
1621                n = chain_loop(NULL);
1622                n->l.n = n2;
1623                break;
1624
1625        case ST_DO:
1626                debug_printf_parse("%s: ST_DO\n", __func__);
1627                n2 = chain_node(OC_EXEC);
1628                n = chain_loop(NULL);
1629                n2->a.n = n->a.n;
1630                next_token(TC_WHILE);
1631                n->l.n = parse_lrparen_list();
1632                break;
1633
1634        case ST_FOR:
1635                debug_printf_parse("%s: ST_FOR\n", __func__);
1636                next_token(TC_LPAREN);
1637                n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1638                if (t_tclass & TC_RPAREN) {     /* for (I in ARRAY) */
1639                        if (!n2 || n2->info != TI_IN)
1640                                syntax_error(EMSG_UNEXP_TOKEN);
1641                        n = chain_node(OC_WALKINIT | VV);
1642                        n->l.n = n2->l.n;
1643                        n->r.n = n2->r.n;
1644                        n = chain_loop(NULL);
1645                        n->info = OC_WALKNEXT | Vx;
1646                        n->l.n = n2->l.n;
1647                } else {                        /* for (;;) */
1648                        n = chain_node(OC_EXEC | Vx);
1649                        n->l.n = n2;
1650                        n2 = parse_expr(TC_SEMICOL);
1651                        n3 = parse_expr(TC_RPAREN);
1652                        n = chain_loop(n3);
1653                        n->l.n = n2;
1654                        if (!n2)
1655                                n->info = OC_EXEC;
1656                }
1657                break;
1658
1659        case OC_PRINT:
1660        case OC_PRINTF:
1661                debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1662                n = chain_node(t_info);
1663                n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1664                if (t_tclass & TC_OUTRDR) {
1665                        n->info |= t_info;
1666                        n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1667                }
1668                if (t_tclass & TC_RBRACE)
1669                        rollback_token();
1670                break;
1671
1672        case OC_BREAK:
1673                debug_printf_parse("%s: OC_BREAK\n", __func__);
1674                n = chain_node(OC_EXEC);
1675                if (!break_ptr)
1676                        syntax_error("'break' not in a loop");
1677                n->a.n = break_ptr;
1678                chain_expr(t_info);
1679                break;
1680
1681        case OC_CONTINUE:
1682                debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1683                n = chain_node(OC_EXEC);
1684                if (!continue_ptr)
1685                        syntax_error("'continue' not in a loop");
1686                n->a.n = continue_ptr;
1687                chain_expr(t_info);
1688                break;
1689
1690        /* delete, next, nextfile, return, exit */
1691        default:
1692                debug_printf_parse("%s: default\n", __func__);
1693                chain_expr(t_info);
1694        }
1695}
1696
1697static void parse_program(char *p)
1698{
1699        debug_printf_parse("%s()\n", __func__);
1700
1701        g_pos = p;
1702        t_lineno = 1;
1703        for (;;) {
1704                uint32_t tclass;
1705
1706                tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1707                        | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1708 got_tok:
1709                if (tclass == TC_EOF) {
1710                        debug_printf_parse("%s: TC_EOF\n", __func__);
1711                        break;
1712                }
1713                if (tclass == TC_NEWLINE) {
1714                        debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1715                        continue;
1716                }
1717                if (tclass == TC_BEGIN) {
1718                        debug_printf_parse("%s: TC_BEGIN\n", __func__);
1719                        seq = &beginseq;
1720                        /* ensure there is no newline between BEGIN and { */
1721                        next_token(TC_LBRACE);
1722                        chain_until_rbrace();
1723                        goto next_tok;
1724                }
1725                if (tclass == TC_END) {
1726                        debug_printf_parse("%s: TC_END\n", __func__);
1727                        seq = &endseq;
1728                        /* ensure there is no newline between END and { */
1729                        next_token(TC_LBRACE);
1730                        chain_until_rbrace();
1731                        goto next_tok;
1732                }
1733                if (tclass == TC_FUNCDECL) {
1734                        func *f;
1735
1736                        debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1737                        next_token(TC_FUNCTION);
1738                        f = newfunc(t_string);
1739                        if (f->defined)
1740                                syntax_error("Duplicate function");
1741                        f->defined = 1;
1742                        //f->body.first = NULL; - already is
1743                        //f->nargs = 0; - already is
1744                        /* func arg list: comma sep list of args, and a close paren */
1745                        for (;;) {
1746                                var *v;
1747                                if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1748                                        if (f->nargs == 0)
1749                                                break; /* func() is ok */
1750                                        /* func(a,) is not ok */
1751                                        syntax_error(EMSG_UNEXP_TOKEN);
1752                                }
1753                                v = findvar(ahash, t_string);
1754                                v->x.aidx = f->nargs++;
1755                                /* Arg followed either by end of arg list or 1 comma */
1756                                if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1757                                        break;
1758                                /* it was a comma, we ate it */
1759                        }
1760                        seq = &f->body;
1761                        /* ensure there is { after "func F(...)" - but newlines are allowed */
1762                        while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1763                                continue;
1764                        chain_until_rbrace();
1765                        hash_clear(ahash);
1766                        goto next_tok;
1767                }
1768                seq = &mainseq;
1769                if (tclass & TS_OPSEQ) {
1770                        node *cn;
1771
1772                        debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1773                        rollback_token();
1774                        cn = chain_node(OC_TEST);
1775                        cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1776                        if (t_tclass == TC_LBRACE) {
1777                                debug_printf_parse("%s: TC_LBRACE\n", __func__);
1778                                chain_until_rbrace();
1779                        } else {
1780                                /* no action, assume default "{ print }" */
1781                                debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1782                                chain_node(OC_PRINT);
1783                        }
1784                        cn->r.n = mainseq.last;
1785                        goto next_tok;
1786                }
1787                /* tclass == TC_LBRACE */
1788                debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1789                chain_until_rbrace();
1790 next_tok:
1791                /* Same as next_token() at the top of the loop, + TC_SEMICOL */
1792                tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1793                        | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1794                /* gawk allows many newlines, but does not allow more than one semicolon:
1795                 *  BEGIN {...}<newline>;<newline>;
1796                 * would complain "each rule must have a pattern or an action part".
1797                 * Same message for
1798                 *  ; BEGIN {...}
1799                 */
1800                if (tclass != TC_SEMICOL)
1801                        goto got_tok; /* use this token */
1802                /* else: loop back - ate the semicolon, get and use _next_ token */
1803        } /* for (;;) */
1804}
1805
1806/* -------- program execution part -------- */
1807
1808/* temporary variables allocator */
1809static var *nvalloc(int sz)
1810{
1811        return xzalloc(sz * sizeof(var));
1812}
1813
1814static void nvfree(var *v, int sz)
1815{
1816        var *p = v;
1817
1818        while (--sz >= 0) {
1819                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1820                        clear_array(iamarray(p));
1821                        free(p->x.array->items);
1822                        free(p->x.array);
1823                }
1824                if (p->type & VF_WALK) {
1825                        walker_list *n;
1826                        walker_list *w = p->x.walker;
1827                        debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1828                        p->x.walker = NULL;
1829                        while (w) {
1830                                n = w->prev;
1831                                debug_printf_walker(" free(%p)\n", w);
1832                                free(w);
1833                                w = n;
1834                        }
1835                }
1836                clrvar(p);
1837                p++;
1838        }
1839
1840        free(v);
1841}
1842
1843static node *mk_splitter(const char *s, tsplitter *spl)
1844{
1845        regex_t *re, *ire;
1846        node *n;
1847
1848        re = &spl->re[0];
1849        ire = &spl->re[1];
1850        n = &spl->n;
1851        if (n->info == TI_REGEXP) {
1852                regfree(re);
1853                regfree(ire); // TODO: nuke ire, use re+1?
1854        }
1855        if (s[0] && s[1]) { /* strlen(s) > 1 */
1856                mk_re_node(s, n, re);
1857        } else {
1858                n->info = (uint32_t) s[0];
1859        }
1860
1861        return n;
1862}
1863
1864static var *evaluate(node *, var *);
1865
1866/* Use node as a regular expression. Supplied with node ptr and regex_t
1867 * storage space. Return ptr to regex (if result points to preg, it should
1868 * be later regfree'd manually).
1869 */
1870static regex_t *as_regex(node *op, regex_t *preg)
1871{
1872        int cflags;
1873        const char *s;
1874
1875        if (op->info == TI_REGEXP) {
1876                return icase ? op->r.ire : op->l.re;
1877        }
1878
1879        //tmpvar = nvalloc(1);
1880#define TMPVAR (&G.as_regex__tmpvar)
1881        // We use a single "static" tmpvar (instead of on-stack or malloced one)
1882        // to decrease memory consumption in deeply-recursive awk programs.
1883        // The rule to work safely is to never call evaluate() while our static
1884        // TMPVAR's value is still needed.
1885        s = getvar_s(evaluate(op, TMPVAR));
1886
1887        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1888        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1889         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1890         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1891         * (maybe gsub is not supposed to use REG_EXTENDED?).
1892         */
1893        if (regcomp(preg, s, cflags)) {
1894                cflags &= ~REG_EXTENDED;
1895                xregcomp(preg, s, cflags);
1896        }
1897        //nvfree(tmpvar, 1);
1898#undef TMPVAR
1899        return preg;
1900}
1901
1902/* gradually increasing buffer.
1903 * note that we reallocate even if n == old_size,
1904 * and thus there is at least one extra allocated byte.
1905 */
1906static char* qrealloc(char *b, int n, int *size)
1907{
1908        if (!b || n >= *size) {
1909                *size = n + (n>>1) + 80;
1910                b = xrealloc(b, *size);
1911        }
1912        return b;
1913}
1914
1915/* resize field storage space */
1916static void fsrealloc(int size)
1917{
1918        int i, newsize;
1919
1920        if (size >= maxfields) {
1921                /* Sanity cap, easier than catering for overflows */
1922                if (size > 0xffffff)
1923                        bb_die_memory_exhausted();
1924
1925                i = maxfields;
1926                maxfields = size + 16;
1927
1928                newsize = maxfields * sizeof(Fields[0]);
1929                debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1930                Fields = xrealloc(Fields, newsize);
1931                debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1932                /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1933
1934                for (; i < maxfields; i++) {
1935                        Fields[i].type = VF_SPECIAL;
1936                        Fields[i].string = NULL;
1937                }
1938        }
1939        /* if size < nfields, clear extra field variables */
1940        for (i = size; i < nfields; i++) {
1941                clrvar(Fields + i);
1942        }
1943        nfields = size;
1944}
1945
1946static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
1947{
1948        int r = regexec(preg, s, 1, pmatch, 0);
1949        if (r == 0 && pmatch[0].rm_eo == 0) {
1950                /* For example, happens when FS can match
1951                 * an empty string (awk -F ' *'). Logically,
1952                 * this should split into one-char fields.
1953                 * However, gawk 5.0.1 searches for first
1954                 * _non-empty_ separator string match:
1955                 */
1956                size_t ofs = 0;
1957                do {
1958                        ofs++;
1959                        if (!s[ofs])
1960                                return REG_NOMATCH;
1961                        regexec(preg, s + ofs, 1, pmatch, 0);
1962                } while (pmatch[0].rm_eo == 0);
1963                pmatch[0].rm_so += ofs;
1964                pmatch[0].rm_eo += ofs;
1965        }
1966        return r;
1967}
1968
1969static int awk_split(const char *s, node *spl, char **slist)
1970{
1971        int n;
1972        char c[4];
1973        char *s1;
1974
1975        /* in worst case, each char would be a separate field */
1976        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1977        strcpy(s1, s);
1978
1979        c[0] = c[1] = (char)spl->info;
1980        c[2] = c[3] = '\0';
1981        if (*getvar_s(intvar[RS]) == '\0')
1982                c[2] = '\n';
1983
1984        n = 0;
1985        if (spl->info == TI_REGEXP) {  /* regex split */
1986                if (!*s)
1987                        return n; /* "": zero fields */
1988                n++; /* at least one field will be there */
1989                do {
1990                        int l;
1991                        regmatch_t pmatch[1];
1992
1993                        l = strcspn(s, c+2); /* len till next NUL or \n */
1994                        if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
1995                         && pmatch[0].rm_so <= l
1996                        ) {
1997                                /* if (pmatch[0].rm_eo == 0) ... - impossible */
1998                                l = pmatch[0].rm_so;
1999                                n++; /* we saw yet another delimiter */
2000                        } else {
2001                                pmatch[0].rm_eo = l;
2002                                if (s[l])
2003                                        pmatch[0].rm_eo++;
2004                        }
2005                        s1 = mempcpy(s1, s, l);
2006                        *s1++ = '\0';
2007                        s += pmatch[0].rm_eo;
2008                } while (*s);
2009
2010                /* echo a-- | awk -F-- '{ print NF, length($NF), $NF }'
2011                 * should print "2 0 ":
2012                 */
2013                *s1 = '\0';
2014
2015                return n;
2016        }
2017        if (c[0] == '\0') {  /* null split */
2018                while (*s) {
2019                        *s1++ = *s++;
2020                        *s1++ = '\0';
2021                        n++;
2022                }
2023                return n;
2024        }
2025        if (c[0] != ' ') {  /* single-character split */
2026                if (icase) {
2027                        c[0] = toupper(c[0]);
2028                        c[1] = tolower(c[1]);
2029                }
2030                if (*s1)
2031                        n++;
2032                while ((s1 = strpbrk(s1, c)) != NULL) {
2033                        *s1++ = '\0';
2034                        n++;
2035                }
2036                return n;
2037        }
2038        /* space split */
2039        while (*s) {
2040                s = skip_whitespace(s);
2041                if (!*s)
2042                        break;
2043                n++;
2044                while (*s && !isspace(*s))
2045                        *s1++ = *s++;
2046                *s1++ = '\0';
2047        }
2048        return n;
2049}
2050
2051static void split_f0(void)
2052{
2053/* static char *fstrings; */
2054#define fstrings (G.split_f0__fstrings)
2055
2056        int i, n;
2057        char *s;
2058
2059        if (is_f0_split)
2060                return;
2061
2062        is_f0_split = TRUE;
2063        free(fstrings);
2064        fsrealloc(0);
2065        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
2066        fsrealloc(n);
2067        s = fstrings;
2068        for (i = 0; i < n; i++) {
2069                Fields[i].string = nextword(&s);
2070                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
2071        }
2072
2073        /* set NF manually to avoid side effects */
2074        clrvar(intvar[NF]);
2075        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2076        intvar[NF]->number = nfields;
2077#undef fstrings
2078}
2079
2080/* perform additional actions when some internal variables changed */
2081static void handle_special(var *v)
2082{
2083        int n;
2084        char *b;
2085        const char *sep, *s;
2086        int sl, l, len, i, bsize;
2087
2088        if (!(v->type & VF_SPECIAL))
2089                return;
2090
2091        if (v == intvar[NF]) {
2092                n = (int)getvar_i(v);
2093                if (n < 0)
2094                        syntax_error("NF set to negative value");
2095                fsrealloc(n);
2096
2097                /* recalculate $0 */
2098                sep = getvar_s(intvar[OFS]);
2099                sl = strlen(sep);
2100                b = NULL;
2101                len = 0;
2102                for (i = 0; i < n; i++) {
2103                        s = getvar_s(&Fields[i]);
2104                        l = strlen(s);
2105                        if (b) {
2106                                memcpy(b+len, sep, sl);
2107                                len += sl;
2108                        }
2109                        b = qrealloc(b, len+l+sl, &bsize);
2110                        memcpy(b+len, s, l);
2111                        len += l;
2112                }
2113                if (b)
2114                        b[len] = '\0';
2115                setvar_p(intvar[F0], b);
2116                is_f0_split = TRUE;
2117
2118        } else if (v == intvar[F0]) {
2119                is_f0_split = FALSE;
2120
2121        } else if (v == intvar[FS]) {
2122                /*
2123                 * The POSIX-2008 standard says that changing FS should have no effect on the
2124                 * current input line, but only on the next one. The language is:
2125                 *
2126                 * > Before the first reference to a field in the record is evaluated, the record
2127                 * > shall be split into fields, according to the rules in Regular Expressions,
2128                 * > using the value of FS that was current at the time the record was read.
2129                 *
2130                 * So, split up current line before assignment to FS:
2131                 */
2132                split_f0();
2133
2134                mk_splitter(getvar_s(v), &fsplitter);
2135        } else if (v == intvar[RS]) {
2136                mk_splitter(getvar_s(v), &rsplitter);
2137        } else if (v == intvar[IGNORECASE]) {
2138                icase = istrue(v);
2139        } else {                                /* $n */
2140                n = getvar_i(intvar[NF]);
2141                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
2142                /* right here v is invalid. Just to note... */
2143        }
2144}
2145
2146/* step through func/builtin/etc arguments */
2147static node *nextarg(node **pn)
2148{
2149        node *n;
2150
2151        n = *pn;
2152        if (n && n->info == TI_COMMA) {
2153                *pn = n->r.n;
2154                n = n->l.n;
2155        } else {
2156                *pn = NULL;
2157        }
2158        return n;
2159}
2160
2161static void hashwalk_init(var *v, xhash *array)
2162{
2163        hash_item *hi;
2164        unsigned i;
2165        walker_list *w;
2166        walker_list *prev_walker;
2167
2168        if (v->type & VF_WALK) {
2169                prev_walker = v->x.walker;
2170        } else {
2171                v->type |= VF_WALK;
2172                prev_walker = NULL;
2173        }
2174        debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
2175
2176        w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
2177        debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
2178        w->cur = w->end = w->wbuf;
2179        w->prev = prev_walker;
2180        for (i = 0; i < array->csize; i++) {
2181                hi = array->items[i];
2182                while (hi) {
2183                        w->end = stpcpy(w->end, hi->name) + 1;
2184                        hi = hi->next;
2185                }
2186        }
2187}
2188
2189static int hashwalk_next(var *v)
2190{
2191        walker_list *w = v->x.walker;
2192
2193        if (w->cur >= w->end) {
2194                walker_list *prev_walker = w->prev;
2195
2196                debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
2197                free(w);
2198                v->x.walker = prev_walker;
2199                return FALSE;
2200        }
2201
2202        setvar_s(v, nextword(&w->cur));
2203        return TRUE;
2204}
2205
2206/* evaluate node, return 1 when result is true, 0 otherwise */
2207static int ptest(node *pattern)
2208{
2209        // We use a single "static" tmpvar (instead of on-stack or malloced one)
2210        // to decrease memory consumption in deeply-recursive awk programs.
2211        // The rule to work safely is to never call evaluate() while our static
2212        // TMPVAR's value is still needed.
2213        return istrue(evaluate(pattern, &G.ptest__tmpvar));
2214}
2215
2216/* read next record from stream rsm into a variable v */
2217static int awk_getline(rstream *rsm, var *v)
2218{
2219        char *b;
2220        regmatch_t pmatch[1];
2221        int size, a, p, pp = 0;
2222        int fd, so, eo, r, rp;
2223        char c, *m, *s;
2224
2225        debug_printf_eval("entered %s()\n", __func__);
2226
2227        /* we're using our own buffer since we need access to accumulating
2228         * characters
2229         */
2230        fd = fileno(rsm->F);
2231        m = rsm->buffer;
2232        a = rsm->adv;
2233        p = rsm->pos;
2234        size = rsm->size;
2235        c = (char) rsplitter.n.info;
2236        rp = 0;
2237
2238        if (!m)
2239                m = qrealloc(m, 256, &size);
2240
2241        do {
2242                b = m + a;
2243                so = eo = p;
2244                r = 1;
2245                if (p > 0) {
2246                        if (rsplitter.n.info == TI_REGEXP) {
2247                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2248                                                        b, 1, pmatch, 0) == 0) {
2249                                        so = pmatch[0].rm_so;
2250                                        eo = pmatch[0].rm_eo;
2251                                        if (b[eo] != '\0')
2252                                                break;
2253                                }
2254                        } else if (c != '\0') {
2255                                s = strchr(b+pp, c);
2256                                if (!s)
2257                                        s = memchr(b+pp, '\0', p - pp);
2258                                if (s) {
2259                                        so = eo = s-b;
2260                                        eo++;
2261                                        break;
2262                                }
2263                        } else {
2264                                while (b[rp] == '\n')
2265                                        rp++;
2266                                s = strstr(b+rp, "\n\n");
2267                                if (s) {
2268                                        so = eo = s-b;
2269                                        while (b[eo] == '\n')
2270                                                eo++;
2271                                        if (b[eo] != '\0')
2272                                                break;
2273                                }
2274                        }
2275                }
2276
2277                if (a > 0) {
2278                        memmove(m, m+a, p+1);
2279                        b = m;
2280                        a = 0;
2281                }
2282
2283                m = qrealloc(m, a+p+128, &size);
2284                b = m + a;
2285                pp = p;
2286                p += safe_read(fd, b+p, size-p-1);
2287                if (p < pp) {
2288                        p = 0;
2289                        r = 0;
2290                        setvar_i(intvar[ERRNO], errno);
2291                }
2292                b[p] = '\0';
2293
2294        } while (p > pp);
2295
2296        if (p == 0) {
2297                r--;
2298        } else {
2299                c = b[so]; b[so] = '\0';
2300                setvar_s(v, b+rp);
2301                v->type |= VF_USER;
2302                b[so] = c;
2303                c = b[eo]; b[eo] = '\0';
2304                setvar_s(intvar[RT], b+so);
2305                b[eo] = c;
2306        }
2307
2308        rsm->buffer = m;
2309        rsm->adv = a + eo;
2310        rsm->pos = p - eo;
2311        rsm->size = size;
2312
2313        debug_printf_eval("returning from %s(): %d\n", __func__, r);
2314
2315        return r;
2316}
2317
2318/* formatted output into an allocated buffer, return ptr to buffer */
2319#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2320# define awk_printf(a, b) awk_printf(a)
2321#endif
2322static char *awk_printf(node *n, size_t *len)
2323{
2324        char *b;
2325        char *fmt, *f;
2326        size_t i;
2327
2328        //tmpvar = nvalloc(1);
2329#define TMPVAR (&G.awk_printf__tmpvar)
2330        // We use a single "static" tmpvar (instead of on-stack or malloced one)
2331        // to decrease memory consumption in deeply-recursive awk programs.
2332        // The rule to work safely is to never call evaluate() while our static
2333        // TMPVAR's value is still needed.
2334        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2335        // ^^^^^^^^^ here we immediately strdup() the value, so the later call
2336        // to evaluate() potentially recursing into another awk_printf() can't
2337        // mangle the value.
2338
2339        b = NULL;
2340        i = 0;
2341        while (*f) { /* "print one format spec" loop */
2342                char *s;
2343                char c;
2344                char sv;
2345                var *arg;
2346                size_t slen;
2347
2348                s = f;
2349                while (*f && (*f != '%' || *++f == '%'))
2350                        f++;
2351                while (*f && !isalpha(*f)) {
2352                        if (*f == '*')
2353                                syntax_error("%*x formats are not supported");
2354                        f++;
2355                }
2356                c = *f;
2357                if (!c) {
2358                        /* Tail of fmt with no percent chars,
2359                         * or "....%" (percent seen, but no format specifier char found)
2360                         */
2361                        slen = strlen(s);
2362                        goto tail;
2363                }
2364                sv = *++f;
2365                *f = '\0';
2366                arg = evaluate(nextarg(&n), TMPVAR);
2367
2368                /* Result can be arbitrarily long. Example:
2369                 *  printf "%99999s", "BOOM"
2370                 */
2371                if (c == 'c') {
2372                        char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2373                        char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
2374                        slen = strlen(r);
2375                        if (cc == '\0') /* if cc is NUL, re-format the string with it */
2376                                sprintf(r, s, cc);
2377                        s = r;
2378                } else {
2379                        if (c == 's') {
2380                                s = xasprintf(s, getvar_s(arg));
2381                        } else {
2382                                double d = getvar_i(arg);
2383                                if (strchr("diouxX", c)) {
2384//TODO: make it wider here (%x -> %llx etc)?
2385                                        s = xasprintf(s, (int)d);
2386                                } else if (strchr("eEfFgGaA", c)) {
2387                                        s = xasprintf(s, d);
2388                                } else {
2389                                        syntax_error(EMSG_INV_FMT);
2390                                }
2391                        }
2392                        slen = strlen(s);
2393                }
2394                *f = sv;
2395
2396                if (i == 0) {
2397                        b = s;
2398                        i = slen;
2399                        continue;
2400                }
2401 tail:
2402                b = xrealloc(b, i + slen + 1);
2403                strcpy(b + i, s);
2404                i += slen;
2405                if (!c) /* tail? */
2406                        break;
2407                free(s);
2408        }
2409
2410        free(fmt);
2411        //nvfree(tmpvar, 1);
2412#undef TMPVAR
2413
2414#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2415        if (len)
2416                *len = i;
2417#endif
2418        return b;
2419}
2420
2421/* Common substitution routine.
2422 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2423 * store result into (dest), return number of substitutions.
2424 * If nm = 0, replace all matches.
2425 * If src or dst is NULL, use $0.
2426 * If subexp != 0, enable subexpression matching (\1-\9).
2427 */
2428static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2429{
2430        char *resbuf;
2431        const char *sp;
2432        int match_no, residx, replen, resbufsize;
2433        int regexec_flags;
2434        regmatch_t pmatch[10];
2435        regex_t sreg, *regex;
2436
2437        resbuf = NULL;
2438        residx = 0;
2439        match_no = 0;
2440        regexec_flags = 0;
2441        regex = as_regex(rn, &sreg);
2442        sp = getvar_s(src ? src : intvar[F0]);
2443        replen = strlen(repl);
2444        while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2445                int so = pmatch[0].rm_so;
2446                int eo = pmatch[0].rm_eo;
2447
2448                //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2449                resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2450                memcpy(resbuf + residx, sp, eo);
2451                residx += eo;
2452                if (++match_no >= nm) {
2453                        const char *s;
2454                        int nbs;
2455
2456                        /* replace */
2457                        residx -= (eo - so);
2458                        nbs = 0;
2459                        for (s = repl; *s; s++) {
2460                                char c = resbuf[residx++] = *s;
2461                                if (c == '\\') {
2462                                        nbs++;
2463                                        continue;
2464                                }
2465                                if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2466                                        int j;
2467                                        residx -= ((nbs + 3) >> 1);
2468                                        j = 0;
2469                                        if (c != '&') {
2470                                                j = c - '0';
2471                                                nbs++;
2472                                        }
2473                                        if (nbs % 2) {
2474                                                resbuf[residx++] = c;
2475                                        } else {
2476                                                int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2477                                                resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2478                                                memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2479                                                residx += n;
2480                                        }
2481                                }
2482                                nbs = 0;
2483                        }
2484                }
2485
2486                regexec_flags = REG_NOTBOL;
2487                sp += eo;
2488                if (match_no == nm)
2489                        break;
2490                if (eo == so) {
2491                        /* Empty match (e.g. "b*" will match anywhere).
2492                         * Advance by one char. */
2493//BUG (bug 1333):
2494//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2495//... and will erroneously match "b" even though it is NOT at the word start.
2496//we need REG_NOTBOW but it does not exist...
2497//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2498//it should be able to do it correctly.
2499                        /* Subtle: this is safe only because
2500                         * qrealloc allocated at least one extra byte */
2501                        resbuf[residx] = *sp;
2502                        if (*sp == '\0')
2503                                goto ret;
2504                        sp++;
2505                        residx++;
2506                }
2507        }
2508
2509        resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2510        strcpy(resbuf + residx, sp);
2511 ret:
2512        //bb_error_msg("end sp:'%s'%p", sp,sp);
2513        setvar_p(dest ? dest : intvar[F0], resbuf);
2514        if (regex == &sreg)
2515                regfree(regex);
2516        return match_no;
2517}
2518
2519static NOINLINE int do_mktime(const char *ds)
2520{
2521        struct tm then;
2522        int count;
2523
2524        /*memset(&then, 0, sizeof(then)); - not needed */
2525        then.tm_isdst = -1; /* default is unknown */
2526
2527        /* manpage of mktime says these fields are ints,
2528         * so we can sscanf stuff directly into them */
2529        count = sscanf(ds, "%u %u %u %u %u %u %d",
2530                &then.tm_year, &then.tm_mon, &then.tm_mday,
2531                &then.tm_hour, &then.tm_min, &then.tm_sec,
2532                &then.tm_isdst);
2533
2534        if (count < 6
2535         || (unsigned)then.tm_mon < 1
2536         || (unsigned)then.tm_year < 1900
2537        ) {
2538                return -1;
2539        }
2540
2541        then.tm_mon -= 1;
2542        then.tm_year -= 1900;
2543
2544        return mktime(&then);
2545}
2546
2547/* Reduce stack usage in exec_builtin() by keeping match() code separate */
2548static NOINLINE var *do_match(node *an1, const char *as0)
2549{
2550        regmatch_t pmatch[1];
2551        regex_t sreg, *re;
2552        int n, start, len;
2553
2554        re = as_regex(an1, &sreg);
2555        n = regexec(re, as0, 1, pmatch, 0);
2556        if (re == &sreg)
2557                regfree(re);
2558        start = 0;
2559        len = -1;
2560        if (n == 0) {
2561                start = pmatch[0].rm_so + 1;
2562                len = pmatch[0].rm_eo - pmatch[0].rm_so;
2563        }
2564        setvar_i(newvar("RLENGTH"), len);
2565        return setvar_i(newvar("RSTART"), start);
2566}
2567
2568/* Reduce stack usage in evaluate() by keeping builtins' code separate */
2569static NOINLINE var *exec_builtin(node *op, var *res)
2570{
2571#define tspl (G.exec_builtin__tspl)
2572
2573        var *tmpvars;
2574        node *an[4];
2575        var *av[4];
2576        const char *as[4];
2577        node *spl;
2578        uint32_t isr, info;
2579        int nargs;
2580        time_t tt;
2581        int i, l, ll, n;
2582
2583        tmpvars = nvalloc(4);
2584#define TMPVAR0 (tmpvars)
2585#define TMPVAR1 (tmpvars + 1)
2586#define TMPVAR2 (tmpvars + 2)
2587#define TMPVAR3 (tmpvars + 3)
2588#define TMPVAR(i) (tmpvars + (i))
2589        isr = info = op->info;
2590        op = op->l.n;
2591
2592        av[2] = av[3] = NULL;
2593        for (i = 0; i < 4 && op; i++) {
2594                an[i] = nextarg(&op);
2595                if (isr & 0x09000000) {
2596                        av[i] = evaluate(an[i], TMPVAR(i));
2597                        if (isr & 0x08000000)
2598                                as[i] = getvar_s(av[i]);
2599                }
2600                isr >>= 1;
2601        }
2602
2603        nargs = i;
2604        if ((uint32_t)nargs < (info >> 30))
2605                syntax_error(EMSG_TOO_FEW_ARGS);
2606
2607        info &= OPNMASK;
2608        switch (info) {
2609
2610        case B_a2:
2611                if (ENABLE_FEATURE_AWK_LIBM)
2612                        setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2613                else
2614                        syntax_error(EMSG_NO_MATH);
2615                break;
2616
2617        case B_sp: {
2618                char *s, *s1;
2619
2620                if (nargs > 2) {
2621                        spl = (an[2]->info == TI_REGEXP) ? an[2]
2622                                : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2623                } else {
2624                        spl = &fsplitter.n;
2625                }
2626
2627                n = awk_split(as[0], spl, &s);
2628                s1 = s;
2629                clear_array(iamarray(av[1]));
2630                for (i = 1; i <= n; i++)
2631                        setari_u(av[1], i, nextword(&s));
2632                free(s1);
2633                setvar_i(res, n);
2634                break;
2635        }
2636
2637        case B_ss: {
2638                char *s;
2639
2640                l = strlen(as[0]);
2641                i = getvar_i(av[1]) - 1;
2642                if (i > l)
2643                        i = l;
2644                if (i < 0)
2645                        i = 0;
2646                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2647                if (n < 0)
2648                        n = 0;
2649                s = xstrndup(as[0]+i, n);
2650                setvar_p(res, s);
2651                break;
2652        }
2653
2654        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2655         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2656        case B_an:
2657                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2658                break;
2659
2660        case B_co:
2661                setvar_i(res, ~getvar_i_int(av[0]));
2662                break;
2663
2664        case B_ls:
2665                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2666                break;
2667
2668        case B_or:
2669                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2670                break;
2671
2672        case B_rs:
2673                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2674                break;
2675
2676        case B_xo:
2677                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2678                break;
2679
2680        case B_lo:
2681        case B_up: {
2682                char *s, *s1;
2683                s1 = s = xstrdup(as[0]);
2684                while (*s1) {
2685                        //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2686                        if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2687                                *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2688                        s1++;
2689                }
2690                setvar_p(res, s);
2691                break;
2692        }
2693
2694        case B_ix:
2695                n = 0;
2696                ll = strlen(as[1]);
2697                l = strlen(as[0]) - ll;
2698                if (ll > 0 && l >= 0) {
2699                        if (!icase) {
2700                                char *s = strstr(as[0], as[1]);
2701                                if (s)
2702                                        n = (s - as[0]) + 1;
2703                        } else {
2704                                /* this piece of code is terribly slow and
2705                                 * really should be rewritten
2706                                 */
2707                                for (i = 0; i <= l; i++) {
2708                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2709                                                n = i+1;
2710                                                break;
2711                                        }
2712                                }
2713                        }
2714                }
2715                setvar_i(res, n);
2716                break;
2717
2718        case B_ti:
2719                if (nargs > 1)
2720                        tt = getvar_i(av[1]);
2721                else
2722                        time(&tt);
2723                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2724                i = strftime(g_buf, MAXVARFMT,
2725                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2726                        localtime(&tt));
2727                g_buf[i] = '\0';
2728                setvar_s(res, g_buf);
2729                break;
2730
2731        case B_mt:
2732                setvar_i(res, do_mktime(as[0]));
2733                break;
2734
2735        case B_ma:
2736                res = do_match(an[1], as[0]);
2737                break;
2738
2739        case B_ge:
2740                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2741                break;
2742
2743        case B_gs:
2744                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2745                break;
2746
2747        case B_su:
2748                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2749                break;
2750        }
2751
2752        nvfree(tmpvars, 4);
2753#undef TMPVAR0
2754#undef TMPVAR1
2755#undef TMPVAR2
2756#undef TMPVAR3
2757#undef TMPVAR
2758
2759        return res;
2760#undef tspl
2761}
2762
2763/* if expr looks like "var=value", perform assignment and return 1,
2764 * otherwise return 0 */
2765static int is_assignment(const char *expr)
2766{
2767        char *exprc, *val;
2768
2769        val = (char*)endofname(expr);
2770        if (val == (char*)expr || *val != '=') {
2771                return FALSE;
2772        }
2773
2774        exprc = xstrdup(expr);
2775        val = exprc + (val - expr);
2776        *val++ = '\0';
2777
2778        unescape_string_in_place(val);
2779        setvar_u(newvar(exprc), val);
2780        free(exprc);
2781        return TRUE;
2782}
2783
2784/* switch to next input file */
2785static rstream *next_input_file(void)
2786{
2787#define rsm          (G.next_input_file__rsm)
2788#define files_happen (G.next_input_file__files_happen)
2789
2790        const char *fname, *ind;
2791
2792        if (rsm.F)
2793                fclose(rsm.F);
2794        rsm.F = NULL;
2795        rsm.pos = rsm.adv = 0;
2796
2797        for (;;) {
2798                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2799                        if (files_happen)
2800                                return NULL;
2801                        fname = "-";
2802                        rsm.F = stdin;
2803                        break;
2804                }
2805                ind = getvar_s(incvar(intvar[ARGIND]));
2806                fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2807                if (fname && *fname && !is_assignment(fname)) {
2808                        rsm.F = xfopen_stdin(fname);
2809                        break;
2810                }
2811        }
2812
2813        files_happen = TRUE;
2814        setvar_s(intvar[FILENAME], fname);
2815        return &rsm;
2816#undef rsm
2817#undef files_happen
2818}
2819
2820/*
2821 * Evaluate node - the heart of the program. Supplied with subtree
2822 * and "res" variable to assign the result to if we evaluate an expression.
2823 * If node refers to e.g. a variable or a field, no assignment happens.
2824 * Return ptr to the result (which may or may not be the "res" variable!)
2825 */
2826#define XC(n) ((n) >> 8)
2827
2828static var *evaluate(node *op, var *res)
2829{
2830/* This procedure is recursive so we should count every byte */
2831#define fnargs (G.evaluate__fnargs)
2832/* seed is initialized to 1 */
2833#define seed   (G.evaluate__seed)
2834#define sreg   (G.evaluate__sreg)
2835
2836        var *tmpvars;
2837
2838        if (!op)
2839                return setvar_s(res, NULL);
2840
2841        debug_printf_eval("entered %s()\n", __func__);
2842
2843        tmpvars = nvalloc(2);
2844#define TMPVAR0 (tmpvars)
2845#define TMPVAR1 (tmpvars + 1)
2846
2847        while (op) {
2848                struct {
2849                        var *v;
2850                        const char *s;
2851                } L = L; /* for compiler */
2852                struct {
2853                        var *v;
2854                        const char *s;
2855                } R = R;
2856                double L_d = L_d;
2857                uint32_t opinfo;
2858                int opn;
2859                node *op1;
2860
2861                opinfo = op->info;
2862                opn = (opinfo & OPNMASK);
2863                g_lineno = op->lineno;
2864                op1 = op->l.n;
2865                debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2866
2867                /* execute inevitable things */
2868                if (opinfo & OF_RES1) {
2869                        if ((opinfo & OF_REQUIRED) && !op1)
2870                                syntax_error(EMSG_TOO_FEW_ARGS);
2871                        L.v = evaluate(op1, TMPVAR0);
2872                        if (opinfo & OF_STR1) {
2873                                L.s = getvar_s(L.v);
2874                                debug_printf_eval("L.s:'%s'\n", L.s);
2875                        }
2876                        if (opinfo & OF_NUM1) {
2877                                L_d = getvar_i(L.v);
2878                                debug_printf_eval("L_d:%f\n", L_d);
2879                        }
2880                }
2881                /* NB: Must get string/numeric values of L (done above)
2882                 * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
2883                 * and right one is large, then L.v points to Fields[NNN1],
2884                 * second evaluate() reallocates and moves (!) Fields[],
2885                 * R.v points to Fields[NNN2] but L.v now points to freed mem!
2886                 * (Seen trying to evaluate "$444 $44444")
2887                 */
2888                if (opinfo & OF_RES2) {
2889                        R.v = evaluate(op->r.n, TMPVAR1);
2890                        //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2891                        //L.v = NULL;
2892                        if (opinfo & OF_STR2) {
2893                                R.s = getvar_s(R.v);
2894                                debug_printf_eval("R.s:'%s'\n", R.s);
2895                        }
2896                }
2897
2898                debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2899                switch (XC(opinfo & OPCLSMASK)) {
2900
2901                /* -- iterative node type -- */
2902
2903                /* test pattern */
2904                case XC( OC_TEST ):
2905                        debug_printf_eval("TEST\n");
2906                        if (op1->info == TI_COMMA) {
2907                                /* it's range pattern */
2908                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2909                                        op->info |= OF_CHECKED;
2910                                        if (ptest(op1->r.n))
2911                                                op->info &= ~OF_CHECKED;
2912                                        op = op->a.n;
2913                                } else {
2914                                        op = op->r.n;
2915                                }
2916                        } else {
2917                                op = ptest(op1) ? op->a.n : op->r.n;
2918                        }
2919                        break;
2920
2921                /* just evaluate an expression, also used as unconditional jump */
2922                case XC( OC_EXEC ):
2923                        debug_printf_eval("EXEC\n");
2924                        break;
2925
2926                /* branch, used in if-else and various loops */
2927                case XC( OC_BR ):
2928                        debug_printf_eval("BR\n");
2929                        op = istrue(L.v) ? op->a.n : op->r.n;
2930                        break;
2931
2932                /* initialize for-in loop */
2933                case XC( OC_WALKINIT ):
2934                        debug_printf_eval("WALKINIT\n");
2935                        hashwalk_init(L.v, iamarray(R.v));
2936                        break;
2937
2938                /* get next array item */
2939                case XC( OC_WALKNEXT ):
2940                        debug_printf_eval("WALKNEXT\n");
2941                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2942                        break;
2943
2944                case XC( OC_PRINT ):
2945                        debug_printf_eval("PRINT /\n");
2946                case XC( OC_PRINTF ):
2947                        debug_printf_eval("PRINTF\n");
2948                {
2949                        FILE *F = stdout;
2950
2951                        if (op->r.n) {
2952                                rstream *rsm = newfile(R.s);
2953                                if (!rsm->F) {
2954                                        if (opn == '|') {
2955                                                rsm->F = popen(R.s, "w");
2956                                                if (rsm->F == NULL)
2957                                                        bb_simple_perror_msg_and_die("popen");
2958                                                rsm->is_pipe = 1;
2959                                        } else {
2960                                                rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2961                                        }
2962                                }
2963                                F = rsm->F;
2964                        }
2965
2966                        /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
2967                         * additional bits to opinfos of print/printf with redirects
2968                         */
2969                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2970                                if (!op1) {
2971                                        fputs(getvar_s(intvar[F0]), F);
2972                                } else {
2973                                        for (;;) {
2974                                                var *v = evaluate(nextarg(&op1), TMPVAR0);
2975                                                if (v->type & VF_NUMBER) {
2976                                                        fmt_num(getvar_s(intvar[OFMT]),
2977                                                                        getvar_i(v));
2978                                                        fputs(g_buf, F);
2979                                                } else {
2980                                                        fputs(getvar_s(v), F);
2981                                                }
2982                                                if (!op1)
2983                                                        break;
2984                                                fputs(getvar_s(intvar[OFS]), F);
2985                                        }
2986                                }
2987                                fputs(getvar_s(intvar[ORS]), F);
2988                        } else {        /* PRINTF */
2989                                IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
2990                                char *s = awk_printf(op1, &len);
2991#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2992                                fwrite(s, len, 1, F);
2993#else
2994                                fputs(s, F);
2995#endif
2996                                free(s);
2997                        }
2998                        fflush(F);
2999                        break;
3000                }
3001
3002                case XC( OC_DELETE ):
3003                        debug_printf_eval("DELETE\n");
3004                {
3005                        /* "delete" is special:
3006                         * "delete array[var--]" must evaluate index expr only once.
3007                         */
3008                        uint32_t info = op1->info & OPCLSMASK;
3009                        var *v;
3010
3011                        if (info == OC_VAR) {
3012                                v = op1->l.v;
3013                        } else if (info == OC_FNARG) {
3014                                v = &fnargs[op1->l.aidx];
3015                        } else {
3016                                syntax_error(EMSG_NOT_ARRAY);
3017                        }
3018                        if (op1->r.n) { /* array ref? */
3019                                const char *s;
3020                                s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3021                                hash_remove(iamarray(v), s);
3022                        } else {
3023                                clear_array(iamarray(v));
3024                        }
3025                        break;
3026                }
3027
3028                case XC( OC_NEWSOURCE ):
3029                        debug_printf_eval("NEWSOURCE\n");
3030                        g_progname = op->l.new_progname;
3031                        break;
3032
3033                case XC( OC_RETURN ):
3034                        debug_printf_eval("RETURN\n");
3035                        copyvar(res, L.v);
3036                        break;
3037
3038                case XC( OC_NEXTFILE ):
3039                        debug_printf_eval("NEXTFILE\n");
3040                        nextfile = TRUE;
3041                case XC( OC_NEXT ):
3042                        debug_printf_eval("NEXT\n");
3043                        nextrec = TRUE;
3044                case XC( OC_DONE ):
3045                        debug_printf_eval("DONE\n");
3046                        clrvar(res);
3047                        break;
3048
3049                case XC( OC_EXIT ):
3050                        debug_printf_eval("EXIT\n");
3051                        if (op1)
3052                                G.exitcode = (int)L_d;
3053                        awk_exit();
3054
3055                /* -- recursive node type -- */
3056
3057                case XC( OC_VAR ):
3058                        debug_printf_eval("VAR\n");
3059                        L.v = op->l.v;
3060                        if (L.v == intvar[NF])
3061                                split_f0();
3062                        goto v_cont;
3063
3064                case XC( OC_FNARG ):
3065                        debug_printf_eval("FNARG[%d]\n", op->l.aidx);
3066                        L.v = &fnargs[op->l.aidx];
3067 v_cont:
3068                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
3069                        break;
3070
3071                case XC( OC_IN ):
3072                        debug_printf_eval("IN\n");
3073                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
3074                        break;
3075
3076                case XC( OC_REGEXP ):
3077                        debug_printf_eval("REGEXP\n");
3078                        op1 = op;
3079                        L.s = getvar_s(intvar[F0]);
3080                        goto re_cont;
3081
3082                case XC( OC_MATCH ):
3083                        debug_printf_eval("MATCH\n");
3084                        op1 = op->r.n;
3085 re_cont:
3086                        {
3087                                regex_t *re = as_regex(op1, &sreg);
3088                                int i = regexec(re, L.s, 0, NULL, 0);
3089                                if (re == &sreg)
3090                                        regfree(re);
3091                                setvar_i(res, (i == 0) ^ (opn == '!'));
3092                        }
3093                        break;
3094
3095                case XC( OC_MOVE ):
3096                        debug_printf_eval("MOVE\n");
3097                        /* if source is a temporary string, jusk relink it to dest */
3098                        if (R.v == TMPVAR1
3099                         && !(R.v->type & VF_NUMBER)
3100                                /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
3101                                 * L.v ends up a string, which is wrong */
3102                         /*&& R.v->string - always not NULL (right?) */
3103                        ) {
3104                                res = setvar_p(L.v, R.v->string); /* avoids strdup */
3105                                R.v->string = NULL;
3106                        } else {
3107                                res = copyvar(L.v, R.v);
3108                        }
3109                        break;
3110
3111                case XC( OC_TERNARY ):
3112                        debug_printf_eval("TERNARY\n");
3113                        if (op->r.n->info != TI_COLON)
3114                                syntax_error(EMSG_POSSIBLE_ERROR);
3115                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
3116                        break;
3117
3118                case XC( OC_FUNC ): {
3119                        var *argvars, *sv_fnargs;
3120                        const char *sv_progname;
3121                        int nargs, i;
3122
3123                        debug_printf_eval("FUNC\n");
3124
3125                        if (!op->r.f->defined)
3126                                syntax_error(EMSG_UNDEF_FUNC);
3127
3128                        /* The body might be empty, still has to eval the args */
3129                        nargs = op->r.f->nargs;
3130                        argvars = nvalloc(nargs);
3131                        i = 0;
3132                        while (op1) {
3133                                var *arg = evaluate(nextarg(&op1), TMPVAR0);
3134                                if (i == nargs) {
3135                                        /* call with more arguments than function takes.
3136                                         * (gawk warns: "warning: function 'f' called with more arguments than declared").
3137                                         * They are still evaluated, but discarded: */
3138                                        clrvar(arg);
3139                                        continue;
3140                                }
3141                                copyvar(&argvars[i], arg);
3142                                argvars[i].type |= VF_CHILD;
3143                                argvars[i].x.parent = arg;
3144                                i++;
3145                        }
3146
3147                        sv_fnargs = fnargs;
3148                        sv_progname = g_progname;
3149
3150                        fnargs = argvars;
3151                        res = evaluate(op->r.f->body.first, res);
3152                        nvfree(argvars, nargs);
3153
3154                        g_progname = sv_progname;
3155                        fnargs = sv_fnargs;
3156
3157                        break;
3158                }
3159
3160                case XC( OC_GETLINE ):
3161                        debug_printf_eval("GETLINE /\n");
3162                case XC( OC_PGETLINE ):
3163                        debug_printf_eval("PGETLINE\n");
3164                {
3165                        rstream *rsm;
3166                        int i;
3167
3168                        if (op1) {
3169                                rsm = newfile(L.s);
3170                                if (!rsm->F) {
3171                                        /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
3172                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
3173                                                rsm->F = popen(L.s, "r");
3174                                                rsm->is_pipe = TRUE;
3175                                        } else {
3176                                                rsm->F = fopen_for_read(L.s);  /* not xfopen! */
3177                                        }
3178                                }
3179                        } else {
3180                                if (!iF)
3181                                        iF = next_input_file();
3182                                rsm = iF;
3183                        }
3184
3185                        if (!rsm || !rsm->F) {
3186                                setvar_i(intvar[ERRNO], errno);
3187                                setvar_i(res, -1);
3188                                break;
3189                        }
3190
3191                        if (!op->r.n)
3192                                R.v = intvar[F0];
3193
3194                        i = awk_getline(rsm, R.v);
3195                        if (i > 0 && !op1) {
3196                                incvar(intvar[FNR]);
3197                                incvar(intvar[NR]);
3198                        }
3199                        setvar_i(res, i);
3200                        break;
3201                }
3202
3203                /* simple builtins */
3204                case XC( OC_FBLTIN ): {
3205                        double R_d = R_d; /* for compiler */
3206                        debug_printf_eval("FBLTIN\n");
3207
3208                        if (op1 && op1->info == TI_COMMA)
3209                                /* Simple builtins take one arg maximum */
3210                                syntax_error("Too many arguments");
3211
3212                        switch (opn) {
3213                        case F_in:
3214                                R_d = (long long)L_d;
3215                                break;
3216
3217                        case F_rn: /*rand*/
3218                                if (op1)
3219                                        syntax_error("Too many arguments");
3220                        {
3221#if RAND_MAX >= 0x7fffffff
3222                                uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3223                                uint64_t v = ((uint64_t)rand() << 32) | u;
3224                                /* the above shift+or is optimized out on 32-bit arches */
3225# if RAND_MAX > 0x7fffffff
3226                                v &= 0x7fffffffffffffffULL;
3227# endif
3228                                R_d = (double)v / 0x8000000000000000ULL;
3229#else
3230# error Not implemented for this value of RAND_MAX
3231#endif
3232                                break;
3233                        }
3234                        case F_co:
3235                                if (ENABLE_FEATURE_AWK_LIBM) {
3236                                        R_d = cos(L_d);
3237                                        break;
3238                                }
3239
3240                        case F_ex:
3241                                if (ENABLE_FEATURE_AWK_LIBM) {
3242                                        R_d = exp(L_d);
3243                                        break;
3244                                }
3245
3246                        case F_lg:
3247                                if (ENABLE_FEATURE_AWK_LIBM) {
3248                                        R_d = log(L_d);
3249                                        break;
3250                                }
3251
3252                        case F_si:
3253                                if (ENABLE_FEATURE_AWK_LIBM) {
3254                                        R_d = sin(L_d);
3255                                        break;
3256                                }
3257
3258                        case F_sq:
3259                                if (ENABLE_FEATURE_AWK_LIBM) {
3260                                        R_d = sqrt(L_d);
3261                                        break;
3262                                }
3263
3264                                syntax_error(EMSG_NO_MATH);
3265                                break;
3266
3267                        case F_sr:
3268                                R_d = (double)seed;
3269                                seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
3270                                srand(seed);
3271                                break;
3272
3273                        case F_ti: /*systime*/
3274                                if (op1)
3275                                        syntax_error("Too many arguments");
3276                                R_d = time(NULL);
3277                                break;
3278
3279                        case F_le:
3280                                debug_printf_eval("length: L.s:'%s'\n", L.s);
3281                                if (!op1) {
3282                                        L.s = getvar_s(intvar[F0]);
3283                                        debug_printf_eval("length: L.s='%s'\n", L.s);
3284                                }
3285                                else if (L.v->type & VF_ARRAY) {
3286                                        R_d = L.v->x.array->nel;
3287                                        debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
3288                                        break;
3289                                }
3290                                R_d = strlen(L.s);
3291                                break;
3292
3293                        case F_sy:
3294                                fflush_all();
3295                                R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
3296                                                ? (system(L.s) >> 8) : 0;
3297                                break;
3298
3299                        case F_ff:
3300                                if (!op1) {
3301                                        fflush(stdout);
3302                                } else if (L.s && *L.s) {
3303                                        rstream *rsm = newfile(L.s);
3304                                        fflush(rsm->F);
3305                                } else {
3306                                        fflush_all();
3307                                }
3308                                break;
3309
3310                        case F_cl: {
3311                                rstream *rsm;
3312                                int err = 0;
3313                                rsm = (rstream *)hash_search(fdhash, L.s);
3314                                debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
3315                                if (rsm) {
3316                                        debug_printf_eval("OC_FBLTIN F_cl "
3317                                                "rsm->is_pipe:%d, ->F:%p\n",
3318                                                rsm->is_pipe, rsm->F);
3319                                        /* Can be NULL if open failed. Example:
3320                                         * getline line <"doesnt_exist";
3321                                         * close("doesnt_exist"); <--- here rsm->F is NULL
3322                                         */
3323                                        if (rsm->F)
3324                                                err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
3325//TODO: fix this case:
3326// $ awk 'BEGIN { print close(""); print ERRNO }'
3327// -1
3328// close of redirection that was never opened
3329// (we print 0, 0)
3330                                        free(rsm->buffer);
3331                                        hash_remove(fdhash, L.s);
3332                                }
3333                                if (err)
3334                                        setvar_i(intvar[ERRNO], errno);
3335                                R_d = (double)err;
3336                                break;
3337                        }
3338                        } /* switch */
3339                        setvar_i(res, R_d);
3340                        break;
3341                }
3342
3343                case XC( OC_BUILTIN ):
3344                        debug_printf_eval("BUILTIN\n");
3345                        res = exec_builtin(op, res);
3346                        break;
3347
3348                case XC( OC_SPRINTF ):
3349                        debug_printf_eval("SPRINTF\n");
3350                        setvar_p(res, awk_printf(op1, NULL));
3351                        break;
3352
3353                case XC( OC_UNARY ):
3354                        debug_printf_eval("UNARY\n");
3355                {
3356                        double Ld, R_d;
3357
3358                        Ld = R_d = getvar_i(R.v);
3359                        switch (opn) {
3360                        case 'P':
3361                                Ld = ++R_d;
3362                                goto r_op_change;
3363                        case 'p':
3364                                R_d++;
3365                                goto r_op_change;
3366                        case 'M':
3367                                Ld = --R_d;
3368                                goto r_op_change;
3369                        case 'm':
3370                                R_d--;
3371 r_op_change:
3372                                setvar_i(R.v, R_d);
3373                                break;
3374                        case '!':
3375                                Ld = !istrue(R.v);
3376                                break;
3377                        case '-':
3378                                Ld = -R_d;
3379                                break;
3380                        }
3381                        setvar_i(res, Ld);
3382                        break;
3383                }
3384
3385                case XC( OC_FIELD ):
3386                        debug_printf_eval("FIELD\n");
3387                {
3388                        int i = (int)getvar_i(R.v);
3389                        if (i < 0)
3390                                syntax_error(EMSG_NEGATIVE_FIELD);
3391                        if (i == 0) {
3392                                res = intvar[F0];
3393                        } else {
3394                                split_f0();
3395                                if (i > nfields)
3396                                        fsrealloc(i);
3397                                res = &Fields[i - 1];
3398                        }
3399                        break;
3400                }
3401
3402                /* concatenation (" ") and index joining (",") */
3403                case XC( OC_CONCAT ):
3404                        debug_printf_eval("CONCAT /\n");
3405                case XC( OC_COMMA ): {
3406                        const char *sep = "";
3407                        debug_printf_eval("COMMA\n");
3408                        if (opinfo == TI_COMMA)
3409                                sep = getvar_s(intvar[SUBSEP]);
3410                        setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
3411                        break;
3412                }
3413
3414                case XC( OC_LAND ):
3415                        debug_printf_eval("LAND\n");
3416                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3417                        break;
3418
3419                case XC( OC_LOR ):
3420                        debug_printf_eval("LOR\n");
3421                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3422                        break;
3423
3424                case XC( OC_BINARY ):
3425                        debug_printf_eval("BINARY /\n");
3426                case XC( OC_REPLACE ):
3427                        debug_printf_eval("REPLACE\n");
3428                {
3429                        double R_d = getvar_i(R.v);
3430                        debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
3431                        switch (opn) {
3432                        case '+':
3433                                L_d += R_d;
3434                                break;
3435                        case '-':
3436                                L_d -= R_d;
3437                                break;
3438                        case '*':
3439                                L_d *= R_d;
3440                                break;
3441                        case '/':
3442                                if (R_d == 0)
3443                                        syntax_error(EMSG_DIV_BY_ZERO);
3444                                L_d /= R_d;
3445                                break;
3446                        case '&':
3447                                if (ENABLE_FEATURE_AWK_LIBM)
3448                                        L_d = pow(L_d, R_d);
3449                                else
3450                                        syntax_error(EMSG_NO_MATH);
3451                                break;
3452                        case '%':
3453                                if (R_d == 0)
3454                                        syntax_error(EMSG_DIV_BY_ZERO);
3455                                L_d -= (long long)(L_d / R_d) * R_d;
3456                                break;
3457                        }
3458                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3459                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3460                        break;
3461                }
3462
3463                case XC( OC_COMPARE ): {
3464                        int i = i; /* for compiler */
3465                        double Ld;
3466                        debug_printf_eval("COMPARE\n");
3467
3468                        if (is_numeric(L.v) && is_numeric(R.v)) {
3469                                Ld = getvar_i(L.v) - getvar_i(R.v);
3470                        } else {
3471                                const char *l = getvar_s(L.v);
3472                                const char *r = getvar_s(R.v);
3473                                Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3474                        }
3475                        switch (opn & 0xfe) {
3476                        case 0:
3477                                i = (Ld > 0);
3478                                break;
3479                        case 2:
3480                                i = (Ld >= 0);
3481                                break;
3482                        case 4:
3483                                i = (Ld == 0);
3484                                break;
3485                        }
3486                        setvar_i(res, (i == 0) ^ (opn & 1));
3487                        break;
3488                }
3489
3490                default:
3491                        syntax_error(EMSG_POSSIBLE_ERROR);
3492                } /* switch */
3493
3494                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3495                        op = op->a.n;
3496                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3497                        break;
3498                if (nextrec)
3499                        break;
3500        } /* while (op) */
3501
3502        nvfree(tmpvars, 2);
3503#undef TMPVAR0
3504#undef TMPVAR1
3505
3506        debug_printf_eval("returning from %s(): %p\n", __func__, res);
3507        return res;
3508#undef fnargs
3509#undef seed
3510#undef sreg
3511}
3512
3513/* -------- main & co. -------- */
3514
3515static int awk_exit(void)
3516{
3517        unsigned i;
3518
3519        if (!exiting) {
3520                exiting = TRUE;
3521                nextrec = FALSE;
3522                evaluate(endseq.first, &G.exit__tmpvar);
3523        }
3524
3525        /* waiting for children */
3526        for (i = 0; i < fdhash->csize; i++) {
3527                hash_item *hi;
3528                hi = fdhash->items[i];
3529                while (hi) {
3530                        if (hi->data.rs.F && hi->data.rs.is_pipe)
3531                                pclose(hi->data.rs.F);
3532                        hi = hi->next;
3533                }
3534        }
3535
3536        exit(G.exitcode);
3537}
3538
3539int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3540int awk_main(int argc UNUSED_PARAM, char **argv)
3541{
3542        unsigned opt;
3543        char *opt_F;
3544        llist_t *list_v = NULL;
3545        llist_t *list_f = NULL;
3546#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3547        llist_t *list_e = NULL;
3548#endif
3549        int i;
3550
3551        INIT_G();
3552
3553        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3554         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3555        if (ENABLE_LOCALE_SUPPORT)
3556                setlocale(LC_NUMERIC, "C");
3557
3558        /* initialize variables */
3559        vhash = hash_init();
3560        {
3561                char *vnames = (char *)vNames; /* cheat */
3562                char *vvalues = (char *)vValues;
3563                for (i = 0; *vnames; i++) {
3564                        var *v;
3565                        intvar[i] = v = newvar(nextword(&vnames));
3566                        if (*vvalues != '\377')
3567                                setvar_s(v, nextword(&vvalues));
3568                        else
3569                                setvar_i(v, 0);
3570
3571                        if (*vnames == '*') {
3572                                v->type |= VF_SPECIAL;
3573                                vnames++;
3574                        }
3575                }
3576        }
3577
3578        handle_special(intvar[FS]);
3579        handle_special(intvar[RS]);
3580
3581        /* Huh, people report that sometimes environ is NULL. Oh well. */
3582        if (environ) {
3583                char **envp;
3584                for (envp = environ; *envp; envp++) {
3585                        /* environ is writable, thus we don't strdup it needlessly */
3586                        char *s = *envp;
3587                        char *s1 = strchr(s, '=');
3588                        if (s1) {
3589                                *s1 = '\0';
3590                                /* Both findvar and setvar_u take const char*
3591                                 * as 2nd arg -> environment is not trashed */
3592                                setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3593                                *s1 = '=';
3594                        }
3595                }
3596        }
3597        opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3598        argv += optind;
3599        //argc -= optind;
3600        if (opt & OPT_W)
3601                bb_simple_error_msg("warning: option -W is ignored");
3602        if (opt & OPT_F) {
3603                unescape_string_in_place(opt_F);
3604                setvar_s(intvar[FS], opt_F);
3605        }
3606        while (list_v) {
3607                if (!is_assignment(llist_pop(&list_v)))
3608                        bb_show_usage();
3609        }
3610
3611        /* Parse all supplied programs */
3612        fnhash = hash_init();
3613        ahash = hash_init();
3614        while (list_f) {
3615                int fd;
3616                char *s;
3617
3618                g_progname = llist_pop(&list_f);
3619                fd = xopen_stdin(g_progname);
3620                s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3621                close(fd);
3622                parse_program(s);
3623                free(s);
3624        }
3625        g_progname = "cmd. line";
3626#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3627        while (list_e) {
3628                parse_program(llist_pop(&list_e));
3629        }
3630#endif
3631//FIXME: preserve order of -e and -f
3632//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
3633        if (!(opt & (OPT_f | OPT_e))) {
3634                if (!*argv)
3635                        bb_show_usage();
3636                parse_program(*argv++);
3637        }
3638        /* Free unused parse structures */
3639        //hash_free(fnhash); // ~250 bytes when empty, used only for function names
3640        //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3641        // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3642        free(fnhash->items);
3643        free(fnhash);
3644        fnhash = NULL; // debug
3645        //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3646
3647        /* Parsing done, on to executing */
3648
3649        /* fill in ARGV array */
3650        setari_u(intvar[ARGV], 0, "awk");
3651        i = 0;
3652        while (*argv)
3653                setari_u(intvar[ARGV], ++i, *argv++);
3654        setvar_i(intvar[ARGC], i + 1);
3655
3656        //fdhash = ahash; // done via define
3657        newfile("/dev/stdin")->F = stdin;
3658        newfile("/dev/stdout")->F = stdout;
3659        newfile("/dev/stderr")->F = stderr;
3660
3661        evaluate(beginseq.first, &G.main__tmpvar);
3662        if (!mainseq.first && !endseq.first)
3663                awk_exit();
3664
3665        /* input file could already be opened in BEGIN block */
3666        if (!iF)
3667                iF = next_input_file();
3668
3669        /* passing through input files */
3670        while (iF) {
3671                nextfile = FALSE;
3672                setvar_i(intvar[FNR], 0);
3673
3674                while ((i = awk_getline(iF, intvar[F0])) > 0) {
3675                        nextrec = FALSE;
3676                        incvar(intvar[NR]);
3677                        incvar(intvar[FNR]);
3678                        evaluate(mainseq.first, &G.main__tmpvar);
3679
3680                        if (nextfile)
3681                                break;
3682                }
3683
3684                if (i < 0)
3685                        syntax_error(strerror(errno));
3686
3687                iF = next_input_file();
3688        }
3689
3690        awk_exit();
3691        /*return 0;*/
3692}
3693