busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9
  10//config:config AWK
  11//config:       bool "awk"
  12//config:       default y
  13//config:       help
  14//config:         Awk is used as a pattern scanning and processing language. This is
  15//config:         the BusyBox implementation of that programming language.
  16//config:
  17//config:config FEATURE_AWK_LIBM
  18//config:       bool "Enable math functions (requires libm)"
  19//config:       default y
  20//config:       depends on AWK
  21//config:       help
  22//config:         Enable math functions of the Awk programming language.
  23//config:         NOTE: This will require libm to be present for linking.
  24//config:
  25//config:config FEATURE_AWK_GNU_EXTENSIONS
  26//config:       bool "Enable a few GNU extensions"
  27//config:       default y
  28//config:       depends on AWK
  29//config:       help
  30//config:         Enable a few features from gawk:
  31//config:         * command line option -e AWK_PROGRAM
  32//config:         * simultaneous use of -f and -e on the command line.
  33//config:           This enables the use of awk library files.
  34//config:           Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
  35
  36//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
  37
  38//kbuild:lib-$(CONFIG_AWK) += awk.o
  39
  40//usage:#define awk_trivial_usage
  41//usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
  42//usage:#define awk_full_usage "\n\n"
  43//usage:       "        -v VAR=VAL      Set variable"
  44//usage:     "\n        -F SEP          Use SEP as field separator"
  45//usage:     "\n        -f FILE         Read program from FILE"
  46//usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
  47//usage:     "\n        -e AWK_PROGRAM"
  48//usage:        )
  49
  50#include "libbb.h"
  51#include "xregex.h"
  52#include <math.h>
  53
  54/* This is a NOEXEC applet. Be very careful! */
  55
  56
  57/* If you comment out one of these below, it will be #defined later
  58 * to perform debug printfs to stderr: */
  59#define debug_printf_walker(...)  do {} while (0)
  60#define debug_printf_eval(...)  do {} while (0)
  61#define debug_printf_parse(...)  do {} while (0)
  62
  63#ifndef debug_printf_walker
  64# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
  65#endif
  66#ifndef debug_printf_eval
  67# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
  68#endif
  69#ifndef debug_printf_parse
  70# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
  71#endif
  72
  73
  74#define OPTSTR_AWK \
  75        "F:v:f:" \
  76        IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
  77        "W:"
  78#define OPTCOMPLSTR_AWK \
  79        "v::f::" \
  80        IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
  81enum {
  82        OPTBIT_F,       /* define field separator */
  83        OPTBIT_v,       /* define variable */
  84        OPTBIT_f,       /* pull in awk program from file */
  85        IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
  86        OPTBIT_W,       /* -W ignored */
  87        OPT_F = 1 << OPTBIT_F,
  88        OPT_v = 1 << OPTBIT_v,
  89        OPT_f = 1 << OPTBIT_f,
  90        OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
  91        OPT_W = 1 << OPTBIT_W
  92};
  93
  94#define MAXVARFMT       240
  95#define MINNVBLOCK      64
  96
  97/* variable flags */
  98#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  99#define VF_ARRAY        0x0002  /* 1 = it's an array */
 100
 101#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
 102#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
 103#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
 104#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
 105#define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
 106#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
 107#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
 108
 109/* these flags are static, don't change them when value is changed */
 110#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
 111
 112typedef struct walker_list {
 113        char *end;
 114        char *cur;
 115        struct walker_list *prev;
 116        char wbuf[1];
 117} walker_list;
 118
 119/* Variable */
 120typedef struct var_s {
 121        unsigned type;            /* flags */
 122        double number;
 123        char *string;
 124        union {
 125                int aidx;               /* func arg idx (for compilation stage) */
 126                struct xhash_s *array;  /* array ptr */
 127                struct var_s *parent;   /* for func args, ptr to actual parameter */
 128                walker_list *walker;    /* list of array elements (for..in) */
 129        } x;
 130} var;
 131
 132/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
 133typedef struct chain_s {
 134        struct node_s *first;
 135        struct node_s *last;
 136        const char *programname;
 137} chain;
 138
 139/* Function */
 140typedef struct func_s {
 141        unsigned nargs;
 142        struct chain_s body;
 143} func;
 144
 145/* I/O stream */
 146typedef struct rstream_s {
 147        FILE *F;
 148        char *buffer;
 149        int adv;
 150        int size;
 151        int pos;
 152        smallint is_pipe;
 153} rstream;
 154
 155typedef struct hash_item_s {
 156        union {
 157                struct var_s v;         /* variable/array hash */
 158                struct rstream_s rs;    /* redirect streams hash */
 159                struct func_s f;        /* functions hash */
 160        } data;
 161        struct hash_item_s *next;       /* next in chain */
 162        char name[1];                   /* really it's longer */
 163} hash_item;
 164
 165typedef struct xhash_s {
 166        unsigned nel;           /* num of elements */
 167        unsigned csize;         /* current hash size */
 168        unsigned nprime;        /* next hash size in PRIMES[] */
 169        unsigned glen;          /* summary length of item names */
 170        struct hash_item_s **items;
 171} xhash;
 172
 173/* Tree node */
 174typedef struct node_s {
 175        uint32_t info;
 176        unsigned lineno;
 177        union {
 178                struct node_s *n;
 179                var *v;
 180                int aidx;
 181                char *new_progname;
 182                regex_t *re;
 183        } l;
 184        union {
 185                struct node_s *n;
 186                regex_t *ire;
 187                func *f;
 188        } r;
 189        union {
 190                struct node_s *n;
 191        } a;
 192} node;
 193
 194/* Block of temporary variables */
 195typedef struct nvblock_s {
 196        int size;
 197        var *pos;
 198        struct nvblock_s *prev;
 199        struct nvblock_s *next;
 200        var nv[];
 201} nvblock;
 202
 203typedef struct tsplitter_s {
 204        node n;
 205        regex_t re[2];
 206} tsplitter;
 207
 208/* simple token classes */
 209/* Order and hex values are very important!!!  See next_token() */
 210#define TC_SEQSTART     1                       /* ( */
 211#define TC_SEQTERM      (1 << 1)                /* ) */
 212#define TC_REGEXP       (1 << 2)                /* /.../ */
 213#define TC_OUTRDR       (1 << 3)                /* | > >> */
 214#define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
 215#define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
 216#define TC_BINOPX       (1 << 6)                /* two-opnd operator */
 217#define TC_IN           (1 << 7)
 218#define TC_COMMA        (1 << 8)
 219#define TC_PIPE         (1 << 9)                /* input redirection pipe */
 220#define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
 221#define TC_ARRTERM      (1 << 11)               /* ] */
 222#define TC_GRPSTART     (1 << 12)               /* { */
 223#define TC_GRPTERM      (1 << 13)               /* } */
 224#define TC_SEMICOL      (1 << 14)
 225#define TC_NEWLINE      (1 << 15)
 226#define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
 227#define TC_WHILE        (1 << 17)
 228#define TC_ELSE         (1 << 18)
 229#define TC_BUILTIN      (1 << 19)
 230#define TC_GETLINE      (1 << 20)
 231#define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
 232#define TC_BEGIN        (1 << 22)
 233#define TC_END          (1 << 23)
 234#define TC_EOF          (1 << 24)
 235#define TC_VARIABLE     (1 << 25)
 236#define TC_ARRAY        (1 << 26)
 237#define TC_FUNCTION     (1 << 27)
 238#define TC_STRING       (1 << 28)
 239#define TC_NUMBER       (1 << 29)
 240
 241#define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
 242
 243/* combined token classes */
 244#define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 245//#define       TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 246#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 247                   | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
 248
 249#define TC_STATEMNT (TC_STATX | TC_WHILE)
 250#define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
 251
 252/* word tokens, cannot mean something else if not expected */
 253#define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
 254                   | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
 255
 256/* discard newlines after these */
 257#define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
 258                   | TC_BINOP | TC_OPTERM)
 259
 260/* what can expression begin with */
 261#define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
 262/* what can group begin with */
 263#define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
 264
 265/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 266/* operator is inserted between them */
 267#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
 268                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
 269#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 270
 271#define OF_RES1    0x010000
 272#define OF_RES2    0x020000
 273#define OF_STR1    0x040000
 274#define OF_STR2    0x080000
 275#define OF_NUM1    0x100000
 276#define OF_CHECKED 0x200000
 277
 278/* combined operator flags */
 279#define xx      0
 280#define xV      OF_RES2
 281#define xS      (OF_RES2 | OF_STR2)
 282#define Vx      OF_RES1
 283#define VV      (OF_RES1 | OF_RES2)
 284#define Nx      (OF_RES1 | OF_NUM1)
 285#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 286#define Sx      (OF_RES1 | OF_STR1)
 287#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 288#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 289
 290#define OPCLSMASK 0xFF00
 291#define OPNMASK   0x007F
 292
 293/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 294 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
 295 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
 296 */
 297#undef P
 298#undef PRIMASK
 299#undef PRIMASK2
 300#define P(x)      (x << 24)
 301#define PRIMASK   0x7F000000
 302#define PRIMASK2  0x7E000000
 303
 304/* Operation classes */
 305
 306#define SHIFT_TIL_THIS  0x0600
 307#define RECUR_FROM_THIS 0x1000
 308
 309enum {
 310        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 311        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 312
 313        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 314        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 315        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 316
 317        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 318        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 319        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 320        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 321        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 322        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 323        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 324        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 325        OC_DONE = 0x2800,
 326
 327        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 328        ST_WHILE = 0x3300
 329};
 330
 331/* simple builtins */
 332enum {
 333        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 334        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 335};
 336
 337/* builtins */
 338enum {
 339        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
 340        B_ge,   B_gs,   B_su,
 341        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 342};
 343
 344/* tokens and their corresponding info values */
 345
 346#define NTC     "\377"  /* switch to next token class (tc<<1) */
 347#define NTCC    '\377'
 348
 349#define OC_B  OC_BUILTIN
 350
 351static const char tokenlist[] ALIGN1 =
 352        "\1("         NTC
 353        "\1)"         NTC
 354        "\1/"         NTC                                   /* REGEXP */
 355        "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
 356        "\2++"        "\2--"        NTC                     /* UOPPOST */
 357        "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
 358        "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
 359        "\2*="        "\2/="        "\2%="      "\2^="
 360        "\1+"         "\1-"         "\3**="     "\2**"
 361        "\1/"         "\1%"         "\1^"       "\1*"
 362        "\2!="        "\2>="        "\2<="      "\1>"
 363        "\1<"         "\2!~"        "\1~"       "\2&&"
 364        "\2||"        "\1?"         "\1:"       NTC
 365        "\2in"        NTC
 366        "\1,"         NTC
 367        "\1|"         NTC
 368        "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
 369        "\1]"         NTC
 370        "\1{"         NTC
 371        "\1}"         NTC
 372        "\1;"         NTC
 373        "\1\n"        NTC
 374        "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
 375        "\10continue" "\6delete"    "\5print"
 376        "\6printf"    "\4next"      "\10nextfile"
 377        "\6return"    "\4exit"      NTC
 378        "\5while"     NTC
 379        "\4else"      NTC
 380
 381        "\3and"       "\5compl"     "\6lshift"  "\2or"
 382        "\6rshift"    "\3xor"
 383        "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
 384        "\3cos"       "\3exp"       "\3int"     "\3log"
 385        "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
 386        "\6gensub"    "\4gsub"      "\5index"   "\6length"
 387        "\5match"     "\5split"     "\7sprintf" "\3sub"
 388        "\6substr"    "\7systime"   "\10strftime" "\6mktime"
 389        "\7tolower"   "\7toupper"   NTC
 390        "\7getline"   NTC
 391        "\4func"      "\10function" NTC
 392        "\5BEGIN"     NTC
 393        "\3END"
 394        /* compiler adds trailing "\0" */
 395        ;
 396
 397static const uint32_t tokeninfo[] = {
 398        0,
 399        0,
 400        OC_REGEXP,
 401        xS|'a',                  xS|'w',                  xS|'|',
 402        OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 403        OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
 404        OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
 405        OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
 406        OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
 407        OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
 408        OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 409        OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 410        OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
 411        OC_IN|SV|P(49), /* in */
 412        OC_COMMA|SS|P(80),
 413        OC_PGETLINE|SV|P(37),
 414        OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 415        0, /* ] */
 416        0,
 417        0,
 418        0,
 419        0, /* \n */
 420        ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 421        OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
 422        OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
 423        OC_RETURN|Vx, OC_EXIT|Nx,
 424        ST_WHILE,
 425        0, /* else */
 426
 427        OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
 428        OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
 429        OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
 430        OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
 431        OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
 432        OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
 433        OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
 434        OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
 435        OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
 436        OC_GETLINE|SV|P(0),
 437        0,                 0,
 438        0,
 439        0 /* END */
 440};
 441
 442/* internal variable names and their initial values       */
 443/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 444enum {
 445        CONVFMT,    OFMT,       FS,         OFS,
 446        ORS,        RS,         RT,         FILENAME,
 447        SUBSEP,     F0,         ARGIND,     ARGC,
 448        ARGV,       ERRNO,      FNR,        NR,
 449        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 450};
 451
 452static const char vNames[] ALIGN1 =
 453        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 454        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 455        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 456        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 457        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 458
 459static const char vValues[] ALIGN1 =
 460        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 461        "\n\0"      "\n\0"      "\0"        "\0"
 462        "\034\0"    "\0"        "\377";
 463
 464/* hash size may grow to these values */
 465#define FIRST_PRIME 61
 466static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 467
 468
 469/* Globals. Split in two parts so that first one is addressed
 470 * with (mostly short) negative offsets.
 471 * NB: it's unsafe to put members of type "double"
 472 * into globals2 (gcc may fail to align them).
 473 */
 474struct globals {
 475        double t_double;
 476        chain beginseq, mainseq, endseq;
 477        chain *seq;
 478        node *break_ptr, *continue_ptr;
 479        rstream *iF;
 480        xhash *vhash, *ahash, *fdhash, *fnhash;
 481        const char *g_progname;
 482        int g_lineno;
 483        int nfields;
 484        int maxfields; /* used in fsrealloc() only */
 485        var *Fields;
 486        nvblock *g_cb;
 487        char *g_pos;
 488        char *g_buf;
 489        smallint icase;
 490        smallint exiting;
 491        smallint nextrec;
 492        smallint nextfile;
 493        smallint is_f0_split;
 494        smallint t_rollback;
 495};
 496struct globals2 {
 497        uint32_t t_info; /* often used */
 498        uint32_t t_tclass;
 499        char *t_string;
 500        int t_lineno;
 501
 502        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 503
 504        /* former statics from various functions */
 505        char *split_f0__fstrings;
 506
 507        uint32_t next_token__save_tclass;
 508        uint32_t next_token__save_info;
 509        uint32_t next_token__ltclass;
 510        smallint next_token__concat_inserted;
 511
 512        smallint next_input_file__files_happen;
 513        rstream next_input_file__rsm;
 514
 515        var *evaluate__fnargs;
 516        unsigned evaluate__seed;
 517        regex_t evaluate__sreg;
 518
 519        var ptest__v;
 520
 521        tsplitter exec_builtin__tspl;
 522
 523        /* biggest and least used members go last */
 524        tsplitter fsplitter, rsplitter;
 525};
 526#define G1 (ptr_to_globals[-1])
 527#define G (*(struct globals2 *)ptr_to_globals)
 528/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 529/*char G1size[sizeof(G1)]; - 0x74 */
 530/*char Gsize[sizeof(G)]; - 0x1c4 */
 531/* Trying to keep most of members accessible with short offsets: */
 532/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
 533#define t_double     (G1.t_double    )
 534#define beginseq     (G1.beginseq    )
 535#define mainseq      (G1.mainseq     )
 536#define endseq       (G1.endseq      )
 537#define seq          (G1.seq         )
 538#define break_ptr    (G1.break_ptr   )
 539#define continue_ptr (G1.continue_ptr)
 540#define iF           (G1.iF          )
 541#define vhash        (G1.vhash       )
 542#define ahash        (G1.ahash       )
 543#define fdhash       (G1.fdhash      )
 544#define fnhash       (G1.fnhash      )
 545#define g_progname   (G1.g_progname  )
 546#define g_lineno     (G1.g_lineno    )
 547#define nfields      (G1.nfields     )
 548#define maxfields    (G1.maxfields   )
 549#define Fields       (G1.Fields      )
 550#define g_cb         (G1.g_cb        )
 551#define g_pos        (G1.g_pos       )
 552#define g_buf        (G1.g_buf       )
 553#define icase        (G1.icase       )
 554#define exiting      (G1.exiting     )
 555#define nextrec      (G1.nextrec     )
 556#define nextfile     (G1.nextfile    )
 557#define is_f0_split  (G1.is_f0_split )
 558#define t_rollback   (G1.t_rollback  )
 559#define t_info       (G.t_info      )
 560#define t_tclass     (G.t_tclass    )
 561#define t_string     (G.t_string    )
 562#define t_lineno     (G.t_lineno    )
 563#define intvar       (G.intvar      )
 564#define fsplitter    (G.fsplitter   )
 565#define rsplitter    (G.rsplitter   )
 566#define INIT_G() do { \
 567        SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
 568        G.next_token__ltclass = TC_OPTERM; \
 569        G.evaluate__seed = 1; \
 570} while (0)
 571
 572
 573/* function prototypes */
 574static void handle_special(var *);
 575static node *parse_expr(uint32_t);
 576static void chain_group(void);
 577static var *evaluate(node *, var *);
 578static rstream *next_input_file(void);
 579static int fmt_num(char *, int, const char *, double, int);
 580static int awk_exit(int) NORETURN;
 581
 582/* ---- error handling ---- */
 583
 584static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
 585static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 586static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 587static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 588static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 589static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
 590static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 591static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 592static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 593static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 594
 595static void zero_out_var(var *vp)
 596{
 597        memset(vp, 0, sizeof(*vp));
 598}
 599
 600static void syntax_error(const char *message) NORETURN;
 601static void syntax_error(const char *message)
 602{
 603        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 604}
 605
 606/* ---- hash stuff ---- */
 607
 608static unsigned hashidx(const char *name)
 609{
 610        unsigned idx = 0;
 611
 612        while (*name)
 613                idx = *name++ + (idx << 6) - idx;
 614        return idx;
 615}
 616
 617/* create new hash */
 618static xhash *hash_init(void)
 619{
 620        xhash *newhash;
 621
 622        newhash = xzalloc(sizeof(*newhash));
 623        newhash->csize = FIRST_PRIME;
 624        newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
 625
 626        return newhash;
 627}
 628
 629/* find item in hash, return ptr to data, NULL if not found */
 630static void *hash_search(xhash *hash, const char *name)
 631{
 632        hash_item *hi;
 633
 634        hi = hash->items[hashidx(name) % hash->csize];
 635        while (hi) {
 636                if (strcmp(hi->name, name) == 0)
 637                        return &hi->data;
 638                hi = hi->next;
 639        }
 640        return NULL;
 641}
 642
 643/* grow hash if it becomes too big */
 644static void hash_rebuild(xhash *hash)
 645{
 646        unsigned newsize, i, idx;
 647        hash_item **newitems, *hi, *thi;
 648
 649        if (hash->nprime == ARRAY_SIZE(PRIMES))
 650                return;
 651
 652        newsize = PRIMES[hash->nprime++];
 653        newitems = xzalloc(newsize * sizeof(newitems[0]));
 654
 655        for (i = 0; i < hash->csize; i++) {
 656                hi = hash->items[i];
 657                while (hi) {
 658                        thi = hi;
 659                        hi = thi->next;
 660                        idx = hashidx(thi->name) % newsize;
 661                        thi->next = newitems[idx];
 662                        newitems[idx] = thi;
 663                }
 664        }
 665
 666        free(hash->items);
 667        hash->csize = newsize;
 668        hash->items = newitems;
 669}
 670
 671/* find item in hash, add it if necessary. Return ptr to data */
 672static void *hash_find(xhash *hash, const char *name)
 673{
 674        hash_item *hi;
 675        unsigned idx;
 676        int l;
 677
 678        hi = hash_search(hash, name);
 679        if (!hi) {
 680                if (++hash->nel / hash->csize > 10)
 681                        hash_rebuild(hash);
 682
 683                l = strlen(name) + 1;
 684                hi = xzalloc(sizeof(*hi) + l);
 685                strcpy(hi->name, name);
 686
 687                idx = hashidx(name) % hash->csize;
 688                hi->next = hash->items[idx];
 689                hash->items[idx] = hi;
 690                hash->glen += l;
 691        }
 692        return &hi->data;
 693}
 694
 695#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 696#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 697#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 698#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 699
 700static void hash_remove(xhash *hash, const char *name)
 701{
 702        hash_item *hi, **phi;
 703
 704        phi = &hash->items[hashidx(name) % hash->csize];
 705        while (*phi) {
 706                hi = *phi;
 707                if (strcmp(hi->name, name) == 0) {
 708                        hash->glen -= (strlen(name) + 1);
 709                        hash->nel--;
 710                        *phi = hi->next;
 711                        free(hi);
 712                        break;
 713                }
 714                phi = &hi->next;
 715        }
 716}
 717
 718/* ------ some useful functions ------ */
 719
 720static char *skip_spaces(char *p)
 721{
 722        while (1) {
 723                if (*p == '\\' && p[1] == '\n') {
 724                        p++;
 725                        t_lineno++;
 726                } else if (*p != ' ' && *p != '\t') {
 727                        break;
 728                }
 729                p++;
 730        }
 731        return p;
 732}
 733
 734/* returns old *s, advances *s past word and terminating NUL */
 735static char *nextword(char **s)
 736{
 737        char *p = *s;
 738        while (*(*s)++ != '\0')
 739                continue;
 740        return p;
 741}
 742
 743static char nextchar(char **s)
 744{
 745        char c, *pps;
 746
 747        c = *(*s)++;
 748        pps = *s;
 749        if (c == '\\')
 750                c = bb_process_escape_sequence((const char**)s);
 751        /* Example awk statement:
 752         * s = "abc\"def"
 753         * we must treat \" as "
 754         */
 755        if (c == '\\' && *s == pps) { /* unrecognized \z? */
 756                c = *(*s); /* yes, fetch z */
 757                if (c)
 758                        (*s)++; /* advance unless z = NUL */
 759        }
 760        return c;
 761}
 762
 763/* TODO: merge with strcpy_and_process_escape_sequences()?
 764 */
 765static void unescape_string_in_place(char *s1)
 766{
 767        char *s = s1;
 768        while ((*s1 = nextchar(&s)) != '\0')
 769                s1++;
 770}
 771
 772static ALWAYS_INLINE int isalnum_(int c)
 773{
 774        return (isalnum(c) || c == '_');
 775}
 776
 777static double my_strtod(char **pp)
 778{
 779        char *cp = *pp;
 780        if (ENABLE_DESKTOP && cp[0] == '0') {
 781                /* Might be hex or octal integer: 0x123abc or 07777 */
 782                char c = (cp[1] | 0x20);
 783                if (c == 'x' || isdigit(cp[1])) {
 784                        unsigned long long ull = strtoull(cp, pp, 0);
 785                        if (c == 'x')
 786                                return ull;
 787                        c = **pp;
 788                        if (!isdigit(c) && c != '.')
 789                                return ull;
 790                        /* else: it may be a floating number. Examples:
 791                         * 009.123 (*pp points to '9')
 792                         * 000.123 (*pp points to '.')
 793                         * fall through to strtod.
 794                         */
 795                }
 796        }
 797        return strtod(cp, pp);
 798}
 799
 800/* -------- working with variables (set/get/copy/etc) -------- */
 801
 802static xhash *iamarray(var *v)
 803{
 804        var *a = v;
 805
 806        while (a->type & VF_CHILD)
 807                a = a->x.parent;
 808
 809        if (!(a->type & VF_ARRAY)) {
 810                a->type |= VF_ARRAY;
 811                a->x.array = hash_init();
 812        }
 813        return a->x.array;
 814}
 815
 816static void clear_array(xhash *array)
 817{
 818        unsigned i;
 819        hash_item *hi, *thi;
 820
 821        for (i = 0; i < array->csize; i++) {
 822                hi = array->items[i];
 823                while (hi) {
 824                        thi = hi;
 825                        hi = hi->next;
 826                        free(thi->data.v.string);
 827                        free(thi);
 828                }
 829                array->items[i] = NULL;
 830        }
 831        array->glen = array->nel = 0;
 832}
 833
 834/* clear a variable */
 835static var *clrvar(var *v)
 836{
 837        if (!(v->type & VF_FSTR))
 838                free(v->string);
 839
 840        v->type &= VF_DONTTOUCH;
 841        v->type |= VF_DIRTY;
 842        v->string = NULL;
 843        return v;
 844}
 845
 846/* assign string value to variable */
 847static var *setvar_p(var *v, char *value)
 848{
 849        clrvar(v);
 850        v->string = value;
 851        handle_special(v);
 852        return v;
 853}
 854
 855/* same as setvar_p but make a copy of string */
 856static var *setvar_s(var *v, const char *value)
 857{
 858        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 859}
 860
 861/* same as setvar_s but sets USER flag */
 862static var *setvar_u(var *v, const char *value)
 863{
 864        v = setvar_s(v, value);
 865        v->type |= VF_USER;
 866        return v;
 867}
 868
 869/* set array element to user string */
 870static void setari_u(var *a, int idx, const char *s)
 871{
 872        var *v;
 873
 874        v = findvar(iamarray(a), itoa(idx));
 875        setvar_u(v, s);
 876}
 877
 878/* assign numeric value to variable */
 879static var *setvar_i(var *v, double value)
 880{
 881        clrvar(v);
 882        v->type |= VF_NUMBER;
 883        v->number = value;
 884        handle_special(v);
 885        return v;
 886}
 887
 888static const char *getvar_s(var *v)
 889{
 890        /* if v is numeric and has no cached string, convert it to string */
 891        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
 892                fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
 893                v->string = xstrdup(g_buf);
 894                v->type |= VF_CACHED;
 895        }
 896        return (v->string == NULL) ? "" : v->string;
 897}
 898
 899static double getvar_i(var *v)
 900{
 901        char *s;
 902
 903        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
 904                v->number = 0;
 905                s = v->string;
 906                if (s && *s) {
 907                        debug_printf_eval("getvar_i: '%s'->", s);
 908                        v->number = my_strtod(&s);
 909                        debug_printf_eval("%f (s:'%s')\n", v->number, s);
 910                        if (v->type & VF_USER) {
 911                                s = skip_spaces(s);
 912                                if (*s != '\0')
 913                                        v->type &= ~VF_USER;
 914                        }
 915                } else {
 916                        debug_printf_eval("getvar_i: '%s'->zero\n", s);
 917                        v->type &= ~VF_USER;
 918                }
 919                v->type |= VF_CACHED;
 920        }
 921        debug_printf_eval("getvar_i: %f\n", v->number);
 922        return v->number;
 923}
 924
 925/* Used for operands of bitwise ops */
 926static unsigned long getvar_i_int(var *v)
 927{
 928        double d = getvar_i(v);
 929
 930        /* Casting doubles to longs is undefined for values outside
 931         * of target type range. Try to widen it as much as possible */
 932        if (d >= 0)
 933                return (unsigned long)d;
 934        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
 935        return - (long) (unsigned long) (-d);
 936}
 937
 938static var *copyvar(var *dest, const var *src)
 939{
 940        if (dest != src) {
 941                clrvar(dest);
 942                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
 943                debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
 944                dest->number = src->number;
 945                if (src->string)
 946                        dest->string = xstrdup(src->string);
 947        }
 948        handle_special(dest);
 949        return dest;
 950}
 951
 952static var *incvar(var *v)
 953{
 954        return setvar_i(v, getvar_i(v) + 1.0);
 955}
 956
 957/* return true if v is number or numeric string */
 958static int is_numeric(var *v)
 959{
 960        getvar_i(v);
 961        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
 962}
 963
 964/* return 1 when value of v corresponds to true, 0 otherwise */
 965static int istrue(var *v)
 966{
 967        if (is_numeric(v))
 968                return (v->number != 0);
 969        return (v->string && v->string[0]);
 970}
 971
 972/* temporary variables allocator. Last allocated should be first freed */
 973static var *nvalloc(int n)
 974{
 975        nvblock *pb = NULL;
 976        var *v, *r;
 977        int size;
 978
 979        while (g_cb) {
 980                pb = g_cb;
 981                if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
 982                        break;
 983                g_cb = g_cb->next;
 984        }
 985
 986        if (!g_cb) {
 987                size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
 988                g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
 989                g_cb->size = size;
 990                g_cb->pos = g_cb->nv;
 991                g_cb->prev = pb;
 992                /*g_cb->next = NULL; - xzalloc did it */
 993                if (pb)
 994                        pb->next = g_cb;
 995        }
 996
 997        v = r = g_cb->pos;
 998        g_cb->pos += n;
 999
1000        while (v < g_cb->pos) {
1001                v->type = 0;
1002                v->string = NULL;
1003                v++;
1004        }
1005
1006        return r;
1007}
1008
1009static void nvfree(var *v)
1010{
1011        var *p;
1012
1013        if (v < g_cb->nv || v >= g_cb->pos)
1014                syntax_error(EMSG_INTERNAL_ERROR);
1015
1016        for (p = v; p < g_cb->pos; p++) {
1017                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018                        clear_array(iamarray(p));
1019                        free(p->x.array->items);
1020                        free(p->x.array);
1021                }
1022                if (p->type & VF_WALK) {
1023                        walker_list *n;
1024                        walker_list *w = p->x.walker;
1025                        debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1026                        p->x.walker = NULL;
1027                        while (w) {
1028                                n = w->prev;
1029                                debug_printf_walker(" free(%p)\n", w);
1030                                free(w);
1031                                w = n;
1032                        }
1033                }
1034                clrvar(p);
1035        }
1036
1037        g_cb->pos = v;
1038        while (g_cb->prev && g_cb->pos == g_cb->nv) {
1039                g_cb = g_cb->prev;
1040        }
1041}
1042
1043/* ------- awk program text parsing ------- */
1044
1045/* Parse next token pointed by global pos, place results into global ttt.
1046 * If token isn't expected, give away. Return token class
1047 */
1048static uint32_t next_token(uint32_t expected)
1049{
1050#define concat_inserted (G.next_token__concat_inserted)
1051#define save_tclass     (G.next_token__save_tclass)
1052#define save_info       (G.next_token__save_info)
1053/* Initialized to TC_OPTERM: */
1054#define ltclass         (G.next_token__ltclass)
1055
1056        char *p, *s;
1057        const char *tl;
1058        uint32_t tc;
1059        const uint32_t *ti;
1060
1061        if (t_rollback) {
1062                t_rollback = FALSE;
1063
1064        } else if (concat_inserted) {
1065                concat_inserted = FALSE;
1066                t_tclass = save_tclass;
1067                t_info = save_info;
1068
1069        } else {
1070                p = g_pos;
1071 readnext:
1072                p = skip_spaces(p);
1073                g_lineno = t_lineno;
1074                if (*p == '#')
1075                        while (*p != '\n' && *p != '\0')
1076                                p++;
1077
1078                if (*p == '\n')
1079                        t_lineno++;
1080
1081                if (*p == '\0') {
1082                        tc = TC_EOF;
1083                        debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1084
1085                } else if (*p == '\"') {
1086                        /* it's a string */
1087                        t_string = s = ++p;
1088                        while (*p != '\"') {
1089                                char *pp;
1090                                if (*p == '\0' || *p == '\n')
1091                                        syntax_error(EMSG_UNEXP_EOS);
1092                                pp = p;
1093                                *s++ = nextchar(&pp);
1094                                p = pp;
1095                        }
1096                        p++;
1097                        *s = '\0';
1098                        tc = TC_STRING;
1099                        debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1100
1101                } else if ((expected & TC_REGEXP) && *p == '/') {
1102                        /* it's regexp */
1103                        t_string = s = ++p;
1104                        while (*p != '/') {
1105                                if (*p == '\0' || *p == '\n')
1106                                        syntax_error(EMSG_UNEXP_EOS);
1107                                *s = *p++;
1108                                if (*s++ == '\\') {
1109                                        char *pp = p;
1110                                        s[-1] = bb_process_escape_sequence((const char **)&pp);
1111                                        if (*p == '\\')
1112                                                *s++ = '\\';
1113                                        if (pp == p)
1114                                                *s++ = *p++;
1115                                        else
1116                                                p = pp;
1117                                }
1118                        }
1119                        p++;
1120                        *s = '\0';
1121                        tc = TC_REGEXP;
1122                        debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1123
1124                } else if (*p == '.' || isdigit(*p)) {
1125                        /* it's a number */
1126                        char *pp = p;
1127                        t_double = my_strtod(&pp);
1128                        p = pp;
1129                        if (*p == '.')
1130                                syntax_error(EMSG_UNEXP_TOKEN);
1131                        tc = TC_NUMBER;
1132                        debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1133
1134                } else {
1135                        /* search for something known */
1136                        tl = tokenlist;
1137                        tc = 0x00000001;
1138                        ti = tokeninfo;
1139                        while (*tl) {
1140                                int l = (unsigned char) *tl++;
1141                                if (l == (unsigned char) NTCC) {
1142                                        tc <<= 1;
1143                                        continue;
1144                                }
1145                                /* if token class is expected,
1146                                 * token matches,
1147                                 * and it's not a longer word,
1148                                 */
1149                                if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150                                 && strncmp(p, tl, l) == 0
1151                                 && !((tc & TC_WORD) && isalnum_(p[l]))
1152                                ) {
1153                                        /* then this is what we are looking for */
1154                                        t_info = *ti;
1155                                        debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1156                                        p += l;
1157                                        goto token_found;
1158                                }
1159                                ti++;
1160                                tl += l;
1161                        }
1162                        /* not a known token */
1163
1164                        /* is it a name? (var/array/function) */
1165                        if (!isalnum_(*p))
1166                                syntax_error(EMSG_UNEXP_TOKEN); /* no */
1167                        /* yes */
1168                        t_string = --p;
1169                        while (isalnum_(*++p)) {
1170                                p[-1] = *p;
1171                        }
1172                        p[-1] = '\0';
1173                        tc = TC_VARIABLE;
1174                        /* also consume whitespace between functionname and bracket */
1175                        if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1176                                p = skip_spaces(p);
1177                        if (*p == '(') {
1178                                tc = TC_FUNCTION;
1179                                debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1180                        } else {
1181                                if (*p == '[') {
1182                                        p++;
1183                                        tc = TC_ARRAY;
1184                                        debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1185                                } else
1186                                        debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1187                        }
1188                }
1189 token_found:
1190                g_pos = p;
1191
1192                /* skipping newlines in some cases */
1193                if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1194                        goto readnext;
1195
1196                /* insert concatenation operator when needed */
1197                if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198                        concat_inserted = TRUE;
1199                        save_tclass = tc;
1200                        save_info = t_info;
1201                        tc = TC_BINOP;
1202                        t_info = OC_CONCAT | SS | P(35);
1203                }
1204
1205                t_tclass = tc;
1206        }
1207        ltclass = t_tclass;
1208
1209        /* Are we ready for this? */
1210        if (!(ltclass & expected))
1211                syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1213
1214        return ltclass;
1215#undef concat_inserted
1216#undef save_tclass
1217#undef save_info
1218#undef ltclass
1219}
1220
1221static void rollback_token(void)
1222{
1223        t_rollback = TRUE;
1224}
1225
1226static node *new_node(uint32_t info)
1227{
1228        node *n;
1229
1230        n = xzalloc(sizeof(node));
1231        n->info = info;
1232        n->lineno = g_lineno;
1233        return n;
1234}
1235
1236static void mk_re_node(const char *s, node *n, regex_t *re)
1237{
1238        n->info = OC_REGEXP;
1239        n->l.re = re;
1240        n->r.ire = re + 1;
1241        xregcomp(re, s, REG_EXTENDED);
1242        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1243}
1244
1245static node *condition(void)
1246{
1247        next_token(TC_SEQSTART);
1248        return parse_expr(TC_SEQTERM);
1249}
1250
1251/* parse expression terminated by given argument, return ptr
1252 * to built subtree. Terminator is eaten by parse_expr */
1253static node *parse_expr(uint32_t iexp)
1254{
1255        node sn;
1256        node *cn = &sn;
1257        node *vn, *glptr;
1258        uint32_t tc, xtc;
1259        var *v;
1260
1261        debug_printf_parse("%s(%x)\n", __func__, iexp);
1262
1263        sn.info = PRIMASK;
1264        sn.r.n = glptr = NULL;
1265        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1266
1267        while (!((tc = next_token(xtc)) & iexp)) {
1268
1269                if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270                        /* input redirection (<) attached to glptr node */
1271                        debug_printf_parse("%s: input redir\n", __func__);
1272                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1273                        cn->a.n = glptr;
1274                        xtc = TC_OPERAND | TC_UOPPRE;
1275                        glptr = NULL;
1276
1277                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278                        debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279                        /* for binary and postfix-unary operators, jump back over
1280                         * previous operators with higher priority */
1281                        vn = cn;
1282                        while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283                            || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1284                        ) {
1285                                vn = vn->a.n;
1286                        }
1287                        if ((t_info & OPCLSMASK) == OC_TERNARY)
1288                                t_info += P(6);
1289                        cn = vn->a.n->r.n = new_node(t_info);
1290                        cn->a.n = vn->a.n;
1291                        if (tc & TC_BINOP) {
1292                                cn->l.n = vn;
1293                                xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294                                if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1295                                        /* it's a pipe */
1296                                        next_token(TC_GETLINE);
1297                                        /* give maximum priority to this pipe */
1298                                        cn->info &= ~PRIMASK;
1299                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1300                                }
1301                        } else {
1302                                cn->r.n = vn;
1303                                xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1304                        }
1305                        vn->a.n = cn;
1306
1307                } else {
1308                        debug_printf_parse("%s: other\n", __func__);
1309                        /* for operands and prefix-unary operators, attach them
1310                         * to last node */
1311                        vn = cn;
1312                        cn = vn->r.n = new_node(t_info);
1313                        cn->a.n = vn;
1314                        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315                        if (tc & (TC_OPERAND | TC_REGEXP)) {
1316                                debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317                                xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318                                /* one should be very careful with switch on tclass -
1319                                 * only simple tclasses should be used! */
1320                                switch (tc) {
1321                                case TC_VARIABLE:
1322                                case TC_ARRAY:
1323                                        debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1324                                        cn->info = OC_VAR;
1325                                        v = hash_search(ahash, t_string);
1326                                        if (v != NULL) {
1327                                                cn->info = OC_FNARG;
1328                                                cn->l.aidx = v->x.aidx;
1329                                        } else {
1330                                                cn->l.v = newvar(t_string);
1331                                        }
1332                                        if (tc & TC_ARRAY) {
1333                                                cn->info |= xS;
1334                                                cn->r.n = parse_expr(TC_ARRTERM);
1335                                        }
1336                                        break;
1337
1338                                case TC_NUMBER:
1339                                case TC_STRING:
1340                                        debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1341                                        cn->info = OC_VAR;
1342                                        v = cn->l.v = xzalloc(sizeof(var));
1343                                        if (tc & TC_NUMBER)
1344                                                setvar_i(v, t_double);
1345                                        else
1346                                                setvar_s(v, t_string);
1347                                        break;
1348
1349                                case TC_REGEXP:
1350                                        debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351                                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1352                                        break;
1353
1354                                case TC_FUNCTION:
1355                                        debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1356                                        cn->info = OC_FUNC;
1357                                        cn->r.f = newfunc(t_string);
1358                                        cn->l.n = condition();
1359                                        break;
1360
1361                                case TC_SEQSTART:
1362                                        debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363                                        cn = vn->r.n = parse_expr(TC_SEQTERM);
1364                                        if (!cn)
1365                                                syntax_error("Empty sequence");
1366                                        cn->a.n = vn;
1367                                        break;
1368
1369                                case TC_GETLINE:
1370                                        debug_printf_parse("%s: TC_GETLINE\n", __func__);
1371                                        glptr = cn;
1372                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1373                                        break;
1374
1375                                case TC_BUILTIN:
1376                                        debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377                                        cn->l.n = condition();
1378                                        break;
1379                                }
1380                        }
1381                }
1382        }
1383
1384        debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1385        return sn.r.n;
1386}
1387
1388/* add node to chain. Return ptr to alloc'd node */
1389static node *chain_node(uint32_t info)
1390{
1391        node *n;
1392
1393        if (!seq->first)
1394                seq->first = seq->last = new_node(0);
1395
1396        if (seq->programname != g_progname) {
1397                seq->programname = g_progname;
1398                n = chain_node(OC_NEWSOURCE);
1399                n->l.new_progname = xstrdup(g_progname);
1400        }
1401
1402        n = seq->last;
1403        n->info = info;
1404        seq->last = n->a.n = new_node(OC_DONE);
1405
1406        return n;
1407}
1408
1409static void chain_expr(uint32_t info)
1410{
1411        node *n;
1412
1413        n = chain_node(info);
1414        n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415        if (t_tclass & TC_GRPTERM)
1416                rollback_token();
1417}
1418
1419static node *chain_loop(node *nn)
1420{
1421        node *n, *n2, *save_brk, *save_cont;
1422
1423        save_brk = break_ptr;
1424        save_cont = continue_ptr;
1425
1426        n = chain_node(OC_BR | Vx);
1427        continue_ptr = new_node(OC_EXEC);
1428        break_ptr = new_node(OC_EXEC);
1429        chain_group();
1430        n2 = chain_node(OC_EXEC | Vx);
1431        n2->l.n = nn;
1432        n2->a.n = n;
1433        continue_ptr->a.n = n2;
1434        break_ptr->a.n = n->r.n = seq->last;
1435
1436        continue_ptr = save_cont;
1437        break_ptr = save_brk;
1438
1439        return n;
1440}
1441
1442/* parse group and attach it to chain */
1443static void chain_group(void)
1444{
1445        uint32_t c;
1446        node *n, *n2, *n3;
1447
1448        do {
1449                c = next_token(TC_GRPSEQ);
1450        } while (c & TC_NEWLINE);
1451
1452        if (c & TC_GRPSTART) {
1453                debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454                while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455                        debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456                        if (t_tclass & TC_NEWLINE)
1457                                continue;
1458                        rollback_token();
1459                        chain_group();
1460                }
1461                debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462        } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463                debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1464                rollback_token();
1465                chain_expr(OC_EXEC | Vx);
1466        } else {
1467                /* TC_STATEMNT */
1468                debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469                switch (t_info & OPCLSMASK) {
1470                case ST_IF:
1471                        debug_printf_parse("%s: ST_IF\n", __func__);
1472                        n = chain_node(OC_BR | Vx);
1473                        n->l.n = condition();
1474                        chain_group();
1475                        n2 = chain_node(OC_EXEC);
1476                        n->r.n = seq->last;
1477                        if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1478                                chain_group();
1479                                n2->a.n = seq->last;
1480                        } else {
1481                                rollback_token();
1482                        }
1483                        break;
1484
1485                case ST_WHILE:
1486                        debug_printf_parse("%s: ST_WHILE\n", __func__);
1487                        n2 = condition();
1488                        n = chain_loop(NULL);
1489                        n->l.n = n2;
1490                        break;
1491
1492                case ST_DO:
1493                        debug_printf_parse("%s: ST_DO\n", __func__);
1494                        n2 = chain_node(OC_EXEC);
1495                        n = chain_loop(NULL);
1496                        n2->a.n = n->a.n;
1497                        next_token(TC_WHILE);
1498                        n->l.n = condition();
1499                        break;
1500
1501                case ST_FOR:
1502                        debug_printf_parse("%s: ST_FOR\n", __func__);
1503                        next_token(TC_SEQSTART);
1504                        n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505                        if (t_tclass & TC_SEQTERM) {    /* for-in */
1506                                if ((n2->info & OPCLSMASK) != OC_IN)
1507                                        syntax_error(EMSG_UNEXP_TOKEN);
1508                                n = chain_node(OC_WALKINIT | VV);
1509                                n->l.n = n2->l.n;
1510                                n->r.n = n2->r.n;
1511                                n = chain_loop(NULL);
1512                                n->info = OC_WALKNEXT | Vx;
1513                                n->l.n = n2->l.n;
1514                        } else {                        /* for (;;) */
1515                                n = chain_node(OC_EXEC | Vx);
1516                                n->l.n = n2;
1517                                n2 = parse_expr(TC_SEMICOL);
1518                                n3 = parse_expr(TC_SEQTERM);
1519                                n = chain_loop(n3);
1520                                n->l.n = n2;
1521                                if (!n2)
1522                                        n->info = OC_EXEC;
1523                        }
1524                        break;
1525
1526                case OC_PRINT:
1527                case OC_PRINTF:
1528                        debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529                        n = chain_node(t_info);
1530                        n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531                        if (t_tclass & TC_OUTRDR) {
1532                                n->info |= t_info;
1533                                n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1534                        }
1535                        if (t_tclass & TC_GRPTERM)
1536                                rollback_token();
1537                        break;
1538
1539                case OC_BREAK:
1540                        debug_printf_parse("%s: OC_BREAK\n", __func__);
1541                        n = chain_node(OC_EXEC);
1542                        n->a.n = break_ptr;
1543                        chain_expr(t_info);
1544                        break;
1545
1546                case OC_CONTINUE:
1547                        debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1548                        n = chain_node(OC_EXEC);
1549                        n->a.n = continue_ptr;
1550                        chain_expr(t_info);
1551                        break;
1552
1553                /* delete, next, nextfile, return, exit */
1554                default:
1555                        debug_printf_parse("%s: default\n", __func__);
1556                        chain_expr(t_info);
1557                }
1558        }
1559}
1560
1561static void parse_program(char *p)
1562{
1563        uint32_t tclass;
1564        node *cn;
1565        func *f;
1566        var *v;
1567
1568        g_pos = p;
1569        t_lineno = 1;
1570        while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1571                        TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1572
1573                if (tclass & TC_OPTERM) {
1574                        debug_printf_parse("%s: TC_OPTERM\n", __func__);
1575                        continue;
1576                }
1577
1578                seq = &mainseq;
1579                if (tclass & TC_BEGIN) {
1580                        debug_printf_parse("%s: TC_BEGIN\n", __func__);
1581                        seq = &beginseq;
1582                        chain_group();
1583
1584                } else if (tclass & TC_END) {
1585                        debug_printf_parse("%s: TC_END\n", __func__);
1586                        seq = &endseq;
1587                        chain_group();
1588
1589                } else if (tclass & TC_FUNCDECL) {
1590                        debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1591                        next_token(TC_FUNCTION);
1592                        g_pos++;
1593                        f = newfunc(t_string);
1594                        f->body.first = NULL;
1595                        f->nargs = 0;
1596                        while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1597                                v = findvar(ahash, t_string);
1598                                v->x.aidx = f->nargs++;
1599
1600                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1601                                        break;
1602                        }
1603                        seq = &f->body;
1604                        chain_group();
1605                        clear_array(ahash);
1606
1607                } else if (tclass & TC_OPSEQ) {
1608                        debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1609                        rollback_token();
1610                        cn = chain_node(OC_TEST);
1611                        cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1612                        if (t_tclass & TC_GRPSTART) {
1613                                debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1614                                rollback_token();
1615                                chain_group();
1616                        } else {
1617                                debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1618                                chain_node(OC_PRINT);
1619                        }
1620                        cn->r.n = mainseq.last;
1621
1622                } else /* if (tclass & TC_GRPSTART) */ {
1623                        debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1624                        rollback_token();
1625                        chain_group();
1626                }
1627        }
1628        debug_printf_parse("%s: TC_EOF\n", __func__);
1629}
1630
1631
1632/* -------- program execution part -------- */
1633
1634static node *mk_splitter(const char *s, tsplitter *spl)
1635{
1636        regex_t *re, *ire;
1637        node *n;
1638
1639        re = &spl->re[0];
1640        ire = &spl->re[1];
1641        n = &spl->n;
1642        if ((n->info & OPCLSMASK) == OC_REGEXP) {
1643                regfree(re);
1644                regfree(ire); // TODO: nuke ire, use re+1?
1645        }
1646        if (s[0] && s[1]) { /* strlen(s) > 1 */
1647                mk_re_node(s, n, re);
1648        } else {
1649                n->info = (uint32_t) s[0];
1650        }
1651
1652        return n;
1653}
1654
1655/* use node as a regular expression. Supplied with node ptr and regex_t
1656 * storage space. Return ptr to regex (if result points to preg, it should
1657 * be later regfree'd manually
1658 */
1659static regex_t *as_regex(node *op, regex_t *preg)
1660{
1661        int cflags;
1662        var *v;
1663        const char *s;
1664
1665        if ((op->info & OPCLSMASK) == OC_REGEXP) {
1666                return icase ? op->r.ire : op->l.re;
1667        }
1668        v = nvalloc(1);
1669        s = getvar_s(evaluate(op, v));
1670
1671        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1672        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1673         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1674         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1675         * (maybe gsub is not supposed to use REG_EXTENDED?).
1676         */
1677        if (regcomp(preg, s, cflags)) {
1678                cflags &= ~REG_EXTENDED;
1679                xregcomp(preg, s, cflags);
1680        }
1681        nvfree(v);
1682        return preg;
1683}
1684
1685/* gradually increasing buffer.
1686 * note that we reallocate even if n == old_size,
1687 * and thus there is at least one extra allocated byte.
1688 */
1689static char* qrealloc(char *b, int n, int *size)
1690{
1691        if (!b || n >= *size) {
1692                *size = n + (n>>1) + 80;
1693                b = xrealloc(b, *size);
1694        }
1695        return b;
1696}
1697
1698/* resize field storage space */
1699static void fsrealloc(int size)
1700{
1701        int i;
1702
1703        if (size >= maxfields) {
1704                i = maxfields;
1705                maxfields = size + 16;
1706                Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1707                for (; i < maxfields; i++) {
1708                        Fields[i].type = VF_SPECIAL;
1709                        Fields[i].string = NULL;
1710                }
1711        }
1712        /* if size < nfields, clear extra field variables */
1713        for (i = size; i < nfields; i++) {
1714                clrvar(Fields + i);
1715        }
1716        nfields = size;
1717}
1718
1719static int awk_split(const char *s, node *spl, char **slist)
1720{
1721        int l, n;
1722        char c[4];
1723        char *s1;
1724        regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1725
1726        /* in worst case, each char would be a separate field */
1727        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1728        strcpy(s1, s);
1729
1730        c[0] = c[1] = (char)spl->info;
1731        c[2] = c[3] = '\0';
1732        if (*getvar_s(intvar[RS]) == '\0')
1733                c[2] = '\n';
1734
1735        n = 0;
1736        if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1737                if (!*s)
1738                        return n; /* "": zero fields */
1739                n++; /* at least one field will be there */
1740                do {
1741                        l = strcspn(s, c+2); /* len till next NUL or \n */
1742                        if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1743                         && pmatch[0].rm_so <= l
1744                        ) {
1745                                l = pmatch[0].rm_so;
1746                                if (pmatch[0].rm_eo == 0) {
1747                                        l++;
1748                                        pmatch[0].rm_eo++;
1749                                }
1750                                n++; /* we saw yet another delimiter */
1751                        } else {
1752                                pmatch[0].rm_eo = l;
1753                                if (s[l])
1754                                        pmatch[0].rm_eo++;
1755                        }
1756                        memcpy(s1, s, l);
1757                        /* make sure we remove *all* of the separator chars */
1758                        do {
1759                                s1[l] = '\0';
1760                        } while (++l < pmatch[0].rm_eo);
1761                        nextword(&s1);
1762                        s += pmatch[0].rm_eo;
1763                } while (*s);
1764                return n;
1765        }
1766        if (c[0] == '\0') {  /* null split */
1767                while (*s) {
1768                        *s1++ = *s++;
1769                        *s1++ = '\0';
1770                        n++;
1771                }
1772                return n;
1773        }
1774        if (c[0] != ' ') {  /* single-character split */
1775                if (icase) {
1776                        c[0] = toupper(c[0]);
1777                        c[1] = tolower(c[1]);
1778                }
1779                if (*s1)
1780                        n++;
1781                while ((s1 = strpbrk(s1, c)) != NULL) {
1782                        *s1++ = '\0';
1783                        n++;
1784                }
1785                return n;
1786        }
1787        /* space split */
1788        while (*s) {
1789                s = skip_whitespace(s);
1790                if (!*s)
1791                        break;
1792                n++;
1793                while (*s && !isspace(*s))
1794                        *s1++ = *s++;
1795                *s1++ = '\0';
1796        }
1797        return n;
1798}
1799
1800static void split_f0(void)
1801{
1802/* static char *fstrings; */
1803#define fstrings (G.split_f0__fstrings)
1804
1805        int i, n;
1806        char *s;
1807
1808        if (is_f0_split)
1809                return;
1810
1811        is_f0_split = TRUE;
1812        free(fstrings);
1813        fsrealloc(0);
1814        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1815        fsrealloc(n);
1816        s = fstrings;
1817        for (i = 0; i < n; i++) {
1818                Fields[i].string = nextword(&s);
1819                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1820        }
1821
1822        /* set NF manually to avoid side effects */
1823        clrvar(intvar[NF]);
1824        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1825        intvar[NF]->number = nfields;
1826#undef fstrings
1827}
1828
1829/* perform additional actions when some internal variables changed */
1830static void handle_special(var *v)
1831{
1832        int n;
1833        char *b;
1834        const char *sep, *s;
1835        int sl, l, len, i, bsize;
1836
1837        if (!(v->type & VF_SPECIAL))
1838                return;
1839
1840        if (v == intvar[NF]) {
1841                n = (int)getvar_i(v);
1842                fsrealloc(n);
1843
1844                /* recalculate $0 */
1845                sep = getvar_s(intvar[OFS]);
1846                sl = strlen(sep);
1847                b = NULL;
1848                len = 0;
1849                for (i = 0; i < n; i++) {
1850                        s = getvar_s(&Fields[i]);
1851                        l = strlen(s);
1852                        if (b) {
1853                                memcpy(b+len, sep, sl);
1854                                len += sl;
1855                        }
1856                        b = qrealloc(b, len+l+sl, &bsize);
1857                        memcpy(b+len, s, l);
1858                        len += l;
1859                }
1860                if (b)
1861                        b[len] = '\0';
1862                setvar_p(intvar[F0], b);
1863                is_f0_split = TRUE;
1864
1865        } else if (v == intvar[F0]) {
1866                is_f0_split = FALSE;
1867
1868        } else if (v == intvar[FS]) {
1869                /*
1870                 * The POSIX-2008 standard says that changing FS should have no effect on the
1871                 * current input line, but only on the next one. The language is:
1872                 *
1873                 * > Before the first reference to a field in the record is evaluated, the record
1874                 * > shall be split into fields, according to the rules in Regular Expressions,
1875                 * > using the value of FS that was current at the time the record was read.
1876                 *
1877                 * So, split up current line before assignment to FS:
1878                 */
1879                split_f0();
1880
1881                mk_splitter(getvar_s(v), &fsplitter);
1882
1883        } else if (v == intvar[RS]) {
1884                mk_splitter(getvar_s(v), &rsplitter);
1885
1886        } else if (v == intvar[IGNORECASE]) {
1887                icase = istrue(v);
1888
1889        } else {                                /* $n */
1890                n = getvar_i(intvar[NF]);
1891                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1892                /* right here v is invalid. Just to note... */
1893        }
1894}
1895
1896/* step through func/builtin/etc arguments */
1897static node *nextarg(node **pn)
1898{
1899        node *n;
1900
1901        n = *pn;
1902        if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1903                *pn = n->r.n;
1904                n = n->l.n;
1905        } else {
1906                *pn = NULL;
1907        }
1908        return n;
1909}
1910
1911static void hashwalk_init(var *v, xhash *array)
1912{
1913        hash_item *hi;
1914        unsigned i;
1915        walker_list *w;
1916        walker_list *prev_walker;
1917
1918        if (v->type & VF_WALK) {
1919                prev_walker = v->x.walker;
1920        } else {
1921                v->type |= VF_WALK;
1922                prev_walker = NULL;
1923        }
1924        debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1925
1926        w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1927        debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1928        w->cur = w->end = w->wbuf;
1929        w->prev = prev_walker;
1930        for (i = 0; i < array->csize; i++) {
1931                hi = array->items[i];
1932                while (hi) {
1933                        strcpy(w->end, hi->name);
1934                        nextword(&w->end);
1935                        hi = hi->next;
1936                }
1937        }
1938}
1939
1940static int hashwalk_next(var *v)
1941{
1942        walker_list *w = v->x.walker;
1943
1944        if (w->cur >= w->end) {
1945                walker_list *prev_walker = w->prev;
1946
1947                debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1948                free(w);
1949                v->x.walker = prev_walker;
1950                return FALSE;
1951        }
1952
1953        setvar_s(v, nextword(&w->cur));
1954        return TRUE;
1955}
1956
1957/* evaluate node, return 1 when result is true, 0 otherwise */
1958static int ptest(node *pattern)
1959{
1960        /* ptest__v is "static": to save stack space? */
1961        return istrue(evaluate(pattern, &G.ptest__v));
1962}
1963
1964/* read next record from stream rsm into a variable v */
1965static int awk_getline(rstream *rsm, var *v)
1966{
1967        char *b;
1968        regmatch_t pmatch[2];
1969        int size, a, p, pp = 0;
1970        int fd, so, eo, r, rp;
1971        char c, *m, *s;
1972
1973        debug_printf_eval("entered %s()\n", __func__);
1974
1975        /* we're using our own buffer since we need access to accumulating
1976         * characters
1977         */
1978        fd = fileno(rsm->F);
1979        m = rsm->buffer;
1980        a = rsm->adv;
1981        p = rsm->pos;
1982        size = rsm->size;
1983        c = (char) rsplitter.n.info;
1984        rp = 0;
1985
1986        if (!m)
1987                m = qrealloc(m, 256, &size);
1988
1989        do {
1990                b = m + a;
1991                so = eo = p;
1992                r = 1;
1993                if (p > 0) {
1994                        if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1995                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1996                                                        b, 1, pmatch, 0) == 0) {
1997                                        so = pmatch[0].rm_so;
1998                                        eo = pmatch[0].rm_eo;
1999                                        if (b[eo] != '\0')
2000                                                break;
2001                                }
2002                        } else if (c != '\0') {
2003                                s = strchr(b+pp, c);
2004                                if (!s)
2005                                        s = memchr(b+pp, '\0', p - pp);
2006                                if (s) {
2007                                        so = eo = s-b;
2008                                        eo++;
2009                                        break;
2010                                }
2011                        } else {
2012                                while (b[rp] == '\n')
2013                                        rp++;
2014                                s = strstr(b+rp, "\n\n");
2015                                if (s) {
2016                                        so = eo = s-b;
2017                                        while (b[eo] == '\n')
2018                                                eo++;
2019                                        if (b[eo] != '\0')
2020                                                break;
2021                                }
2022                        }
2023                }
2024
2025                if (a > 0) {
2026                        memmove(m, m+a, p+1);
2027                        b = m;
2028                        a = 0;
2029                }
2030
2031                m = qrealloc(m, a+p+128, &size);
2032                b = m + a;
2033                pp = p;
2034                p += safe_read(fd, b+p, size-p-1);
2035                if (p < pp) {
2036                        p = 0;
2037                        r = 0;
2038                        setvar_i(intvar[ERRNO], errno);
2039                }
2040                b[p] = '\0';
2041
2042        } while (p > pp);
2043
2044        if (p == 0) {
2045                r--;
2046        } else {
2047                c = b[so]; b[so] = '\0';
2048                setvar_s(v, b+rp);
2049                v->type |= VF_USER;
2050                b[so] = c;
2051                c = b[eo]; b[eo] = '\0';
2052                setvar_s(intvar[RT], b+so);
2053                b[eo] = c;
2054        }
2055
2056        rsm->buffer = m;
2057        rsm->adv = a + eo;
2058        rsm->pos = p - eo;
2059        rsm->size = size;
2060
2061        debug_printf_eval("returning from %s(): %d\n", __func__, r);
2062
2063        return r;
2064}
2065
2066static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2067{
2068        int r = 0;
2069        char c;
2070        const char *s = format;
2071
2072        if (int_as_int && n == (long long)n) {
2073                r = snprintf(b, size, "%lld", (long long)n);
2074        } else {
2075                do { c = *s; } while (c && *++s);
2076                if (strchr("diouxX", c)) {
2077                        r = snprintf(b, size, format, (int)n);
2078                } else if (strchr("eEfgG", c)) {
2079                        r = snprintf(b, size, format, n);
2080                } else {
2081                        syntax_error(EMSG_INV_FMT);
2082                }
2083        }
2084        return r;
2085}
2086
2087/* formatted output into an allocated buffer, return ptr to buffer */
2088static char *awk_printf(node *n)
2089{
2090        char *b = NULL;
2091        char *fmt, *s, *f;
2092        const char *s1;
2093        int i, j, incr, bsize;
2094        char c, c1;
2095        var *v, *arg;
2096
2097        v = nvalloc(1);
2098        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2099
2100        i = 0;
2101        while (*f) {
2102                s = f;
2103                while (*f && (*f != '%' || *++f == '%'))
2104                        f++;
2105                while (*f && !isalpha(*f)) {
2106                        if (*f == '*')
2107                                syntax_error("%*x formats are not supported");
2108                        f++;
2109                }
2110
2111                incr = (f - s) + MAXVARFMT;
2112                b = qrealloc(b, incr + i, &bsize);
2113                c = *f;
2114                if (c != '\0')
2115                        f++;
2116                c1 = *f;
2117                *f = '\0';
2118                arg = evaluate(nextarg(&n), v);
2119
2120                j = i;
2121                if (c == 'c' || !c) {
2122                        i += sprintf(b+i, s, is_numeric(arg) ?
2123                                        (char)getvar_i(arg) : *getvar_s(arg));
2124                } else if (c == 's') {
2125                        s1 = getvar_s(arg);
2126                        b = qrealloc(b, incr+i+strlen(s1), &bsize);
2127                        i += sprintf(b+i, s, s1);
2128                } else {
2129                        i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2130                }
2131                *f = c1;
2132
2133                /* if there was an error while sprintf, return value is negative */
2134                if (i < j)
2135                        i = j;
2136        }
2137
2138        free(fmt);
2139        nvfree(v);
2140        b = xrealloc(b, i + 1);
2141        b[i] = '\0';
2142        return b;
2143}
2144
2145/* Common substitution routine.
2146 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2147 * store result into (dest), return number of substitutions.
2148 * If nm = 0, replace all matches.
2149 * If src or dst is NULL, use $0.
2150 * If subexp != 0, enable subexpression matching (\1-\9).
2151 */
2152static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2153{
2154        char *resbuf;
2155        const char *sp;
2156        int match_no, residx, replen, resbufsize;
2157        int regexec_flags;
2158        regmatch_t pmatch[10];
2159        regex_t sreg, *regex;
2160
2161        resbuf = NULL;
2162        residx = 0;
2163        match_no = 0;
2164        regexec_flags = 0;
2165        regex = as_regex(rn, &sreg);
2166        sp = getvar_s(src ? src : intvar[F0]);
2167        replen = strlen(repl);
2168        while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2169                int so = pmatch[0].rm_so;
2170                int eo = pmatch[0].rm_eo;
2171
2172                //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2173                resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2174                memcpy(resbuf + residx, sp, eo);
2175                residx += eo;
2176                if (++match_no >= nm) {
2177                        const char *s;
2178                        int nbs;
2179
2180                        /* replace */
2181                        residx -= (eo - so);
2182                        nbs = 0;
2183                        for (s = repl; *s; s++) {
2184                                char c = resbuf[residx++] = *s;
2185                                if (c == '\\') {
2186                                        nbs++;
2187                                        continue;
2188                                }
2189                                if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2190                                        int j;
2191                                        residx -= ((nbs + 3) >> 1);
2192                                        j = 0;
2193                                        if (c != '&') {
2194                                                j = c - '0';
2195                                                nbs++;
2196                                        }
2197                                        if (nbs % 2) {
2198                                                resbuf[residx++] = c;
2199                                        } else {
2200                                                int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2201                                                resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2202                                                memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2203                                                residx += n;
2204                                        }
2205                                }
2206                                nbs = 0;
2207                        }
2208                }
2209
2210                regexec_flags = REG_NOTBOL;
2211                sp += eo;
2212                if (match_no == nm)
2213                        break;
2214                if (eo == so) {
2215                        /* Empty match (e.g. "b*" will match anywhere).
2216                         * Advance by one char. */
2217//BUG (bug 1333):
2218//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2219//... and will erroneously match "b" even though it is NOT at the word start.
2220//we need REG_NOTBOW but it does not exist...
2221//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2222//it should be able to do it correctly.
2223                        /* Subtle: this is safe only because
2224                         * qrealloc allocated at least one extra byte */
2225                        resbuf[residx] = *sp;
2226                        if (*sp == '\0')
2227                                goto ret;
2228                        sp++;
2229                        residx++;
2230                }
2231        }
2232
2233        resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2234        strcpy(resbuf + residx, sp);
2235 ret:
2236        //bb_error_msg("end sp:'%s'%p", sp,sp);
2237        setvar_p(dest ? dest : intvar[F0], resbuf);
2238        if (regex == &sreg)
2239                regfree(regex);
2240        return match_no;
2241}
2242
2243static NOINLINE int do_mktime(const char *ds)
2244{
2245        struct tm then;
2246        int count;
2247
2248        /*memset(&then, 0, sizeof(then)); - not needed */
2249        then.tm_isdst = -1; /* default is unknown */
2250
2251        /* manpage of mktime says these fields are ints,
2252         * so we can sscanf stuff directly into them */
2253        count = sscanf(ds, "%u %u %u %u %u %u %d",
2254                &then.tm_year, &then.tm_mon, &then.tm_mday,
2255                &then.tm_hour, &then.tm_min, &then.tm_sec,
2256                &then.tm_isdst);
2257
2258        if (count < 6
2259         || (unsigned)then.tm_mon < 1
2260         || (unsigned)then.tm_year < 1900
2261        ) {
2262                return -1;
2263        }
2264
2265        then.tm_mon -= 1;
2266        then.tm_year -= 1900;
2267
2268        return mktime(&then);
2269}
2270
2271static NOINLINE var *exec_builtin(node *op, var *res)
2272{
2273#define tspl (G.exec_builtin__tspl)
2274
2275        var *tv;
2276        node *an[4];
2277        var *av[4];
2278        const char *as[4];
2279        regmatch_t pmatch[2];
2280        regex_t sreg, *re;
2281        node *spl;
2282        uint32_t isr, info;
2283        int nargs;
2284        time_t tt;
2285        int i, l, ll, n;
2286
2287        tv = nvalloc(4);
2288        isr = info = op->info;
2289        op = op->l.n;
2290
2291        av[2] = av[3] = NULL;
2292        for (i = 0; i < 4 && op; i++) {
2293                an[i] = nextarg(&op);
2294                if (isr & 0x09000000)
2295                        av[i] = evaluate(an[i], &tv[i]);
2296                if (isr & 0x08000000)
2297                        as[i] = getvar_s(av[i]);
2298                isr >>= 1;
2299        }
2300
2301        nargs = i;
2302        if ((uint32_t)nargs < (info >> 30))
2303                syntax_error(EMSG_TOO_FEW_ARGS);
2304
2305        info &= OPNMASK;
2306        switch (info) {
2307
2308        case B_a2:
2309                if (ENABLE_FEATURE_AWK_LIBM)
2310                        setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2311                else
2312                        syntax_error(EMSG_NO_MATH);
2313                break;
2314
2315        case B_sp: {
2316                char *s, *s1;
2317
2318                if (nargs > 2) {
2319                        spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2320                                an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2321                } else {
2322                        spl = &fsplitter.n;
2323                }
2324
2325                n = awk_split(as[0], spl, &s);
2326                s1 = s;
2327                clear_array(iamarray(av[1]));
2328                for (i = 1; i <= n; i++)
2329                        setari_u(av[1], i, nextword(&s));
2330                free(s1);
2331                setvar_i(res, n);
2332                break;
2333        }
2334
2335        case B_ss: {
2336                char *s;
2337
2338                l = strlen(as[0]);
2339                i = getvar_i(av[1]) - 1;
2340                if (i > l)
2341                        i = l;
2342                if (i < 0)
2343                        i = 0;
2344                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2345                if (n < 0)
2346                        n = 0;
2347                s = xstrndup(as[0]+i, n);
2348                setvar_p(res, s);
2349                break;
2350        }
2351
2352        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2353         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2354        case B_an:
2355                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2356                break;
2357
2358        case B_co:
2359                setvar_i(res, ~getvar_i_int(av[0]));
2360                break;
2361
2362        case B_ls:
2363                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2364                break;
2365
2366        case B_or:
2367                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2368                break;
2369
2370        case B_rs:
2371                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2372                break;
2373
2374        case B_xo:
2375                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2376                break;
2377
2378        case B_lo:
2379        case B_up: {
2380                char *s, *s1;
2381                s1 = s = xstrdup(as[0]);
2382                while (*s1) {
2383                        //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2384                        if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2385                                *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2386                        s1++;
2387                }
2388                setvar_p(res, s);
2389                break;
2390        }
2391
2392        case B_ix:
2393                n = 0;
2394                ll = strlen(as[1]);
2395                l = strlen(as[0]) - ll;
2396                if (ll > 0 && l >= 0) {
2397                        if (!icase) {
2398                                char *s = strstr(as[0], as[1]);
2399                                if (s)
2400                                        n = (s - as[0]) + 1;
2401                        } else {
2402                                /* this piece of code is terribly slow and
2403                                 * really should be rewritten
2404                                 */
2405                                for (i = 0; i <= l; i++) {
2406                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2407                                                n = i+1;
2408                                                break;
2409                                        }
2410                                }
2411                        }
2412                }
2413                setvar_i(res, n);
2414                break;
2415
2416        case B_ti:
2417                if (nargs > 1)
2418                        tt = getvar_i(av[1]);
2419                else
2420                        time(&tt);
2421                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2422                i = strftime(g_buf, MAXVARFMT,
2423                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2424                        localtime(&tt));
2425                g_buf[i] = '\0';
2426                setvar_s(res, g_buf);
2427                break;
2428
2429        case B_mt:
2430                setvar_i(res, do_mktime(as[0]));
2431                break;
2432
2433        case B_ma:
2434                re = as_regex(an[1], &sreg);
2435                n = regexec(re, as[0], 1, pmatch, 0);
2436                if (n == 0) {
2437                        pmatch[0].rm_so++;
2438                        pmatch[0].rm_eo++;
2439                } else {
2440                        pmatch[0].rm_so = 0;
2441                        pmatch[0].rm_eo = -1;
2442                }
2443                setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2444                setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2445                setvar_i(res, pmatch[0].rm_so);
2446                if (re == &sreg)
2447                        regfree(re);
2448                break;
2449
2450        case B_ge:
2451                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2452                break;
2453
2454        case B_gs:
2455                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2456                break;
2457
2458        case B_su:
2459                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2460                break;
2461        }
2462
2463        nvfree(tv);
2464        return res;
2465#undef tspl
2466}
2467
2468/*
2469 * Evaluate node - the heart of the program. Supplied with subtree
2470 * and place where to store result. returns ptr to result.
2471 */
2472#define XC(n) ((n) >> 8)
2473
2474static var *evaluate(node *op, var *res)
2475{
2476/* This procedure is recursive so we should count every byte */
2477#define fnargs (G.evaluate__fnargs)
2478/* seed is initialized to 1 */
2479#define seed   (G.evaluate__seed)
2480#define sreg   (G.evaluate__sreg)
2481
2482        var *v1;
2483
2484        if (!op)
2485                return setvar_s(res, NULL);
2486
2487        debug_printf_eval("entered %s()\n", __func__);
2488
2489        v1 = nvalloc(2);
2490
2491        while (op) {
2492                struct {
2493                        var *v;
2494                        const char *s;
2495                } L = L; /* for compiler */
2496                struct {
2497                        var *v;
2498                        const char *s;
2499                } R = R;
2500                double L_d = L_d;
2501                uint32_t opinfo;
2502                int opn;
2503                node *op1;
2504
2505                opinfo = op->info;
2506                opn = (opinfo & OPNMASK);
2507                g_lineno = op->lineno;
2508                op1 = op->l.n;
2509                debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2510
2511                /* execute inevitable things */
2512                if (opinfo & OF_RES1)
2513                        L.v = evaluate(op1, v1);
2514                if (opinfo & OF_RES2)
2515                        R.v = evaluate(op->r.n, v1+1);
2516                if (opinfo & OF_STR1) {
2517                        L.s = getvar_s(L.v);
2518                        debug_printf_eval("L.s:'%s'\n", L.s);
2519                }
2520                if (opinfo & OF_STR2) {
2521                        R.s = getvar_s(R.v);
2522                        debug_printf_eval("R.s:'%s'\n", R.s);
2523                }
2524                if (opinfo & OF_NUM1) {
2525                        L_d = getvar_i(L.v);
2526                        debug_printf_eval("L_d:%f\n", L_d);
2527                }
2528
2529                debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2530                switch (XC(opinfo & OPCLSMASK)) {
2531
2532                /* -- iterative node type -- */
2533
2534                /* test pattern */
2535                case XC( OC_TEST ):
2536                        if ((op1->info & OPCLSMASK) == OC_COMMA) {
2537                                /* it's range pattern */
2538                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2539                                        op->info |= OF_CHECKED;
2540                                        if (ptest(op1->r.n))
2541                                                op->info &= ~OF_CHECKED;
2542                                        op = op->a.n;
2543                                } else {
2544                                        op = op->r.n;
2545                                }
2546                        } else {
2547                                op = ptest(op1) ? op->a.n : op->r.n;
2548                        }
2549                        break;
2550
2551                /* just evaluate an expression, also used as unconditional jump */
2552                case XC( OC_EXEC ):
2553                        break;
2554
2555                /* branch, used in if-else and various loops */
2556                case XC( OC_BR ):
2557                        op = istrue(L.v) ? op->a.n : op->r.n;
2558                        break;
2559
2560                /* initialize for-in loop */
2561                case XC( OC_WALKINIT ):
2562                        hashwalk_init(L.v, iamarray(R.v));
2563                        break;
2564
2565                /* get next array item */
2566                case XC( OC_WALKNEXT ):
2567                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2568                        break;
2569
2570                case XC( OC_PRINT ):
2571                case XC( OC_PRINTF ): {
2572                        FILE *F = stdout;
2573
2574                        if (op->r.n) {
2575                                rstream *rsm = newfile(R.s);
2576                                if (!rsm->F) {
2577                                        if (opn == '|') {
2578                                                rsm->F = popen(R.s, "w");
2579                                                if (rsm->F == NULL)
2580                                                        bb_perror_msg_and_die("popen");
2581                                                rsm->is_pipe = 1;
2582                                        } else {
2583                                                rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2584                                        }
2585                                }
2586                                F = rsm->F;
2587                        }
2588
2589                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2590                                if (!op1) {
2591                                        fputs(getvar_s(intvar[F0]), F);
2592                                } else {
2593                                        while (op1) {
2594                                                var *v = evaluate(nextarg(&op1), v1);
2595                                                if (v->type & VF_NUMBER) {
2596                                                        fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2597                                                                        getvar_i(v), TRUE);
2598                                                        fputs(g_buf, F);
2599                                                } else {
2600                                                        fputs(getvar_s(v), F);
2601                                                }
2602
2603                                                if (op1)
2604                                                        fputs(getvar_s(intvar[OFS]), F);
2605                                        }
2606                                }
2607                                fputs(getvar_s(intvar[ORS]), F);
2608
2609                        } else {        /* OC_PRINTF */
2610                                char *s = awk_printf(op1);
2611                                fputs(s, F);
2612                                free(s);
2613                        }
2614                        fflush(F);
2615                        break;
2616                }
2617
2618                case XC( OC_DELETE ): {
2619                        uint32_t info = op1->info & OPCLSMASK;
2620                        var *v;
2621
2622                        if (info == OC_VAR) {
2623                                v = op1->l.v;
2624                        } else if (info == OC_FNARG) {
2625                                v = &fnargs[op1->l.aidx];
2626                        } else {
2627                                syntax_error(EMSG_NOT_ARRAY);
2628                        }
2629
2630                        if (op1->r.n) {
2631                                const char *s;
2632                                clrvar(L.v);
2633                                s = getvar_s(evaluate(op1->r.n, v1));
2634                                hash_remove(iamarray(v), s);
2635                        } else {
2636                                clear_array(iamarray(v));
2637                        }
2638                        break;
2639                }
2640
2641                case XC( OC_NEWSOURCE ):
2642                        g_progname = op->l.new_progname;
2643                        break;
2644
2645                case XC( OC_RETURN ):
2646                        copyvar(res, L.v);
2647                        break;
2648
2649                case XC( OC_NEXTFILE ):
2650                        nextfile = TRUE;
2651                case XC( OC_NEXT ):
2652                        nextrec = TRUE;
2653                case XC( OC_DONE ):
2654                        clrvar(res);
2655                        break;
2656
2657                case XC( OC_EXIT ):
2658                        awk_exit(L_d);
2659
2660                /* -- recursive node type -- */
2661
2662                case XC( OC_VAR ):
2663                        L.v = op->l.v;
2664                        if (L.v == intvar[NF])
2665                                split_f0();
2666                        goto v_cont;
2667
2668                case XC( OC_FNARG ):
2669                        L.v = &fnargs[op->l.aidx];
2670 v_cont:
2671                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2672                        break;
2673
2674                case XC( OC_IN ):
2675                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2676                        break;
2677
2678                case XC( OC_REGEXP ):
2679                        op1 = op;
2680                        L.s = getvar_s(intvar[F0]);
2681                        goto re_cont;
2682
2683                case XC( OC_MATCH ):
2684                        op1 = op->r.n;
2685 re_cont:
2686                        {
2687                                regex_t *re = as_regex(op1, &sreg);
2688                                int i = regexec(re, L.s, 0, NULL, 0);
2689                                if (re == &sreg)
2690                                        regfree(re);
2691                                setvar_i(res, (i == 0) ^ (opn == '!'));
2692                        }
2693                        break;
2694
2695                case XC( OC_MOVE ):
2696                        debug_printf_eval("MOVE\n");
2697                        /* if source is a temporary string, jusk relink it to dest */
2698//Disabled: if R.v is numeric but happens to have cached R.v->string,
2699//then L.v ends up being a string, which is wrong
2700//                      if (R.v == v1+1 && R.v->string) {
2701//                              res = setvar_p(L.v, R.v->string);
2702//                              R.v->string = NULL;
2703//                      } else {
2704                                res = copyvar(L.v, R.v);
2705//                      }
2706                        break;
2707
2708                case XC( OC_TERNARY ):
2709                        if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2710                                syntax_error(EMSG_POSSIBLE_ERROR);
2711                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2712                        break;
2713
2714                case XC( OC_FUNC ): {
2715                        var *vbeg, *v;
2716                        const char *sv_progname;
2717
2718                        /* The body might be empty, still has to eval the args */
2719                        if (!op->r.n->info && !op->r.f->body.first)
2720                                syntax_error(EMSG_UNDEF_FUNC);
2721
2722                        vbeg = v = nvalloc(op->r.f->nargs + 1);
2723                        while (op1) {
2724                                var *arg = evaluate(nextarg(&op1), v1);
2725                                copyvar(v, arg);
2726                                v->type |= VF_CHILD;
2727                                v->x.parent = arg;
2728                                if (++v - vbeg >= op->r.f->nargs)
2729                                        break;
2730                        }
2731
2732                        v = fnargs;
2733                        fnargs = vbeg;
2734                        sv_progname = g_progname;
2735
2736                        res = evaluate(op->r.f->body.first, res);
2737
2738                        g_progname = sv_progname;
2739                        nvfree(fnargs);
2740                        fnargs = v;
2741
2742                        break;
2743                }
2744
2745                case XC( OC_GETLINE ):
2746                case XC( OC_PGETLINE ): {
2747                        rstream *rsm;
2748                        int i;
2749
2750                        if (op1) {
2751                                rsm = newfile(L.s);
2752                                if (!rsm->F) {
2753                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2754                                                rsm->F = popen(L.s, "r");
2755                                                rsm->is_pipe = TRUE;
2756                                        } else {
2757                                                rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2758                                        }
2759                                }
2760                        } else {
2761                                if (!iF)
2762                                        iF = next_input_file();
2763                                rsm = iF;
2764                        }
2765
2766                        if (!rsm || !rsm->F) {
2767                                setvar_i(intvar[ERRNO], errno);
2768                                setvar_i(res, -1);
2769                                break;
2770                        }
2771
2772                        if (!op->r.n)
2773                                R.v = intvar[F0];
2774
2775                        i = awk_getline(rsm, R.v);
2776                        if (i > 0 && !op1) {
2777                                incvar(intvar[FNR]);
2778                                incvar(intvar[NR]);
2779                        }
2780                        setvar_i(res, i);
2781                        break;
2782                }
2783
2784                /* simple builtins */
2785                case XC( OC_FBLTIN ): {
2786                        double R_d = R_d; /* for compiler */
2787
2788                        switch (opn) {
2789                        case F_in:
2790                                R_d = (long long)L_d;
2791                                break;
2792
2793                        case F_rn:
2794                                R_d = (double)rand() / (double)RAND_MAX;
2795                                break;
2796
2797                        case F_co:
2798                                if (ENABLE_FEATURE_AWK_LIBM) {
2799                                        R_d = cos(L_d);
2800                                        break;
2801                                }
2802
2803                        case F_ex:
2804                                if (ENABLE_FEATURE_AWK_LIBM) {
2805                                        R_d = exp(L_d);
2806                                        break;
2807                                }
2808
2809                        case F_lg:
2810                                if (ENABLE_FEATURE_AWK_LIBM) {
2811                                        R_d = log(L_d);
2812                                        break;
2813                                }
2814
2815                        case F_si:
2816                                if (ENABLE_FEATURE_AWK_LIBM) {
2817                                        R_d = sin(L_d);
2818                                        break;
2819                                }
2820
2821                        case F_sq:
2822                                if (ENABLE_FEATURE_AWK_LIBM) {
2823                                        R_d = sqrt(L_d);
2824                                        break;
2825                                }
2826
2827                                syntax_error(EMSG_NO_MATH);
2828                                break;
2829
2830                        case F_sr:
2831                                R_d = (double)seed;
2832                                seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2833                                srand(seed);
2834                                break;
2835
2836                        case F_ti:
2837                                R_d = time(NULL);
2838                                break;
2839
2840                        case F_le:
2841                                debug_printf_eval("length: L.s:'%s'\n", L.s);
2842                                if (!op1) {
2843                                        L.s = getvar_s(intvar[F0]);
2844                                        debug_printf_eval("length: L.s='%s'\n", L.s);
2845                                }
2846                                else if (L.v->type & VF_ARRAY) {
2847                                        R_d = L.v->x.array->nel;
2848                                        debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2849                                        break;
2850                                }
2851                                R_d = strlen(L.s);
2852                                break;
2853
2854                        case F_sy:
2855                                fflush_all();
2856                                R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2857                                                ? (system(L.s) >> 8) : 0;
2858                                break;
2859
2860                        case F_ff:
2861                                if (!op1) {
2862                                        fflush(stdout);
2863                                } else if (L.s && *L.s) {
2864                                        rstream *rsm = newfile(L.s);
2865                                        fflush(rsm->F);
2866                                } else {
2867                                        fflush_all();
2868                                }
2869                                break;
2870
2871                        case F_cl: {
2872                                rstream *rsm;
2873                                int err = 0;
2874                                rsm = (rstream *)hash_search(fdhash, L.s);
2875                                debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2876                                if (rsm) {
2877                                        debug_printf_eval("OC_FBLTIN F_cl "
2878                                                "rsm->is_pipe:%d, ->F:%p\n",
2879                                                rsm->is_pipe, rsm->F);
2880                                        /* Can be NULL if open failed. Example:
2881                                         * getline line <"doesnt_exist";
2882                                         * close("doesnt_exist"); <--- here rsm->F is NULL
2883                                         */
2884                                        if (rsm->F)
2885                                                err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2886                                        free(rsm->buffer);
2887                                        hash_remove(fdhash, L.s);
2888                                }
2889                                if (err)
2890                                        setvar_i(intvar[ERRNO], errno);
2891                                R_d = (double)err;
2892                                break;
2893                        }
2894                        } /* switch */
2895                        setvar_i(res, R_d);
2896                        break;
2897                }
2898
2899                case XC( OC_BUILTIN ):
2900                        res = exec_builtin(op, res);
2901                        break;
2902
2903                case XC( OC_SPRINTF ):
2904                        setvar_p(res, awk_printf(op1));
2905                        break;
2906
2907                case XC( OC_UNARY ): {
2908                        double Ld, R_d;
2909
2910                        Ld = R_d = getvar_i(R.v);
2911                        switch (opn) {
2912                        case 'P':
2913                                Ld = ++R_d;
2914                                goto r_op_change;
2915                        case 'p':
2916                                R_d++;
2917                                goto r_op_change;
2918                        case 'M':
2919                                Ld = --R_d;
2920                                goto r_op_change;
2921                        case 'm':
2922                                R_d--;
2923 r_op_change:
2924                                setvar_i(R.v, R_d);
2925                                break;
2926                        case '!':
2927                                Ld = !istrue(R.v);
2928                                break;
2929                        case '-':
2930                                Ld = -R_d;
2931                                break;
2932                        }
2933                        setvar_i(res, Ld);
2934                        break;
2935                }
2936
2937                case XC( OC_FIELD ): {
2938                        int i = (int)getvar_i(R.v);
2939                        if (i == 0) {
2940                                res = intvar[F0];
2941                        } else {
2942                                split_f0();
2943                                if (i > nfields)
2944                                        fsrealloc(i);
2945                                res = &Fields[i - 1];
2946                        }
2947                        break;
2948                }
2949
2950                /* concatenation (" ") and index joining (",") */
2951                case XC( OC_CONCAT ):
2952                case XC( OC_COMMA ): {
2953                        const char *sep = "";
2954                        if ((opinfo & OPCLSMASK) == OC_COMMA)
2955                                sep = getvar_s(intvar[SUBSEP]);
2956                        setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2957                        break;
2958                }
2959
2960                case XC( OC_LAND ):
2961                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2962                        break;
2963
2964                case XC( OC_LOR ):
2965                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2966                        break;
2967
2968                case XC( OC_BINARY ):
2969                case XC( OC_REPLACE ): {
2970                        double R_d = getvar_i(R.v);
2971                        debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2972                        switch (opn) {
2973                        case '+':
2974                                L_d += R_d;
2975                                break;
2976                        case '-':
2977                                L_d -= R_d;
2978                                break;
2979                        case '*':
2980                                L_d *= R_d;
2981                                break;
2982                        case '/':
2983                                if (R_d == 0)
2984                                        syntax_error(EMSG_DIV_BY_ZERO);
2985                                L_d /= R_d;
2986                                break;
2987                        case '&':
2988                                if (ENABLE_FEATURE_AWK_LIBM)
2989                                        L_d = pow(L_d, R_d);
2990                                else
2991                                        syntax_error(EMSG_NO_MATH);
2992                                break;
2993                        case '%':
2994                                if (R_d == 0)
2995                                        syntax_error(EMSG_DIV_BY_ZERO);
2996                                L_d -= (long long)(L_d / R_d) * R_d;
2997                                break;
2998                        }
2999                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3000                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3001                        break;
3002                }
3003
3004                case XC( OC_COMPARE ): {
3005                        int i = i; /* for compiler */
3006                        double Ld;
3007
3008                        if (is_numeric(L.v) && is_numeric(R.v)) {
3009                                Ld = getvar_i(L.v) - getvar_i(R.v);
3010                        } else {
3011                                const char *l = getvar_s(L.v);
3012                                const char *r = getvar_s(R.v);
3013                                Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3014                        }
3015                        switch (opn & 0xfe) {
3016                        case 0:
3017                                i = (Ld > 0);
3018                                break;
3019                        case 2:
3020                                i = (Ld >= 0);
3021                                break;
3022                        case 4:
3023                                i = (Ld == 0);
3024                                break;
3025                        }
3026                        setvar_i(res, (i == 0) ^ (opn & 1));
3027                        break;
3028                }
3029
3030                default:
3031                        syntax_error(EMSG_POSSIBLE_ERROR);
3032                }
3033                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3034                        op = op->a.n;
3035                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3036                        break;
3037                if (nextrec)
3038                        break;
3039        } /* while (op) */
3040
3041        nvfree(v1);
3042        debug_printf_eval("returning from %s(): %p\n", __func__, res);
3043        return res;
3044#undef fnargs
3045#undef seed
3046#undef sreg
3047}
3048
3049
3050/* -------- main & co. -------- */
3051
3052static int awk_exit(int r)
3053{
3054        var tv;
3055        unsigned i;
3056        hash_item *hi;
3057
3058        zero_out_var(&tv);
3059
3060        if (!exiting) {
3061                exiting = TRUE;
3062                nextrec = FALSE;
3063                evaluate(endseq.first, &tv);
3064        }
3065
3066        /* waiting for children */
3067        for (i = 0; i < fdhash->csize; i++) {
3068                hi = fdhash->items[i];
3069                while (hi) {
3070                        if (hi->data.rs.F && hi->data.rs.is_pipe)
3071                                pclose(hi->data.rs.F);
3072                        hi = hi->next;
3073                }
3074        }
3075
3076        exit(r);
3077}
3078
3079/* if expr looks like "var=value", perform assignment and return 1,
3080 * otherwise return 0 */
3081static int is_assignment(const char *expr)
3082{
3083        char *exprc, *val;
3084
3085        if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3086                return FALSE;
3087        }
3088
3089        exprc = xstrdup(expr);
3090        val = exprc + (val - expr);
3091        *val++ = '\0';
3092
3093        unescape_string_in_place(val);
3094        setvar_u(newvar(exprc), val);
3095        free(exprc);
3096        return TRUE;
3097}
3098
3099/* switch to next input file */
3100static rstream *next_input_file(void)
3101{
3102#define rsm          (G.next_input_file__rsm)
3103#define files_happen (G.next_input_file__files_happen)
3104
3105        FILE *F;
3106        const char *fname, *ind;
3107
3108        if (rsm.F)
3109                fclose(rsm.F);
3110        rsm.F = NULL;
3111        rsm.pos = rsm.adv = 0;
3112
3113        for (;;) {
3114                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3115                        if (files_happen)
3116                                return NULL;
3117                        fname = "-";
3118                        F = stdin;
3119                        break;
3120                }
3121                ind = getvar_s(incvar(intvar[ARGIND]));
3122                fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3123                if (fname && *fname && !is_assignment(fname)) {
3124                        F = xfopen_stdin(fname);
3125                        break;
3126                }
3127        }
3128
3129        files_happen = TRUE;
3130        setvar_s(intvar[FILENAME], fname);
3131        rsm.F = F;
3132        return &rsm;
3133#undef rsm
3134#undef files_happen
3135}
3136
3137int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3138int awk_main(int argc, char **argv)
3139{
3140        unsigned opt;
3141        char *opt_F;
3142        llist_t *list_v = NULL;
3143        llist_t *list_f = NULL;
3144#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3145        llist_t *list_e = NULL;
3146#endif
3147        int i, j;
3148        var *v;
3149        var tv;
3150        char **envp;
3151        char *vnames = (char *)vNames; /* cheat */
3152        char *vvalues = (char *)vValues;
3153
3154        INIT_G();
3155
3156        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3157         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3158        if (ENABLE_LOCALE_SUPPORT)
3159                setlocale(LC_NUMERIC, "C");
3160
3161        zero_out_var(&tv);
3162
3163        /* allocate global buffer */
3164        g_buf = xmalloc(MAXVARFMT + 1);
3165
3166        vhash = hash_init();
3167        ahash = hash_init();
3168        fdhash = hash_init();
3169        fnhash = hash_init();
3170
3171        /* initialize variables */
3172        for (i = 0; *vnames; i++) {
3173                intvar[i] = v = newvar(nextword(&vnames));
3174                if (*vvalues != '\377')
3175                        setvar_s(v, nextword(&vvalues));
3176                else
3177                        setvar_i(v, 0);
3178
3179                if (*vnames == '*') {
3180                        v->type |= VF_SPECIAL;
3181                        vnames++;
3182                }
3183        }
3184
3185        handle_special(intvar[FS]);
3186        handle_special(intvar[RS]);
3187
3188        newfile("/dev/stdin")->F = stdin;
3189        newfile("/dev/stdout")->F = stdout;
3190        newfile("/dev/stderr")->F = stderr;
3191
3192        /* Huh, people report that sometimes environ is NULL. Oh well. */
3193        if (environ) for (envp = environ; *envp; envp++) {
3194                /* environ is writable, thus we don't strdup it needlessly */
3195                char *s = *envp;
3196                char *s1 = strchr(s, '=');
3197                if (s1) {
3198                        *s1 = '\0';
3199                        /* Both findvar and setvar_u take const char*
3200                         * as 2nd arg -> environment is not trashed */
3201                        setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3202                        *s1 = '=';
3203                }
3204        }
3205        opt_complementary = OPTCOMPLSTR_AWK;
3206        opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3207        argv += optind;
3208        argc -= optind;
3209        if (opt & OPT_W)
3210                bb_error_msg("warning: option -W is ignored");
3211        if (opt & OPT_F) {
3212                unescape_string_in_place(opt_F);
3213                setvar_s(intvar[FS], opt_F);
3214        }
3215        while (list_v) {
3216                if (!is_assignment(llist_pop(&list_v)))
3217                        bb_show_usage();
3218        }
3219        while (list_f) {
3220                char *s = NULL;
3221                FILE *from_file;
3222
3223                g_progname = llist_pop(&list_f);
3224                from_file = xfopen_stdin(g_progname);
3225                /* one byte is reserved for some trick in next_token */
3226                for (i = j = 1; j > 0; i += j) {
3227                        s = xrealloc(s, i + 4096);
3228                        j = fread(s + i, 1, 4094, from_file);
3229                }
3230                s[i] = '\0';
3231                fclose(from_file);
3232                parse_program(s + 1);
3233                free(s);
3234        }
3235        g_progname = "cmd. line";
3236#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3237        while (list_e) {
3238                parse_program(llist_pop(&list_e));
3239        }
3240#endif
3241        if (!(opt & (OPT_f | OPT_e))) {
3242                if (!*argv)
3243                        bb_show_usage();
3244                parse_program(*argv++);
3245                argc--;
3246        }
3247
3248        /* fill in ARGV array */
3249        setvar_i(intvar[ARGC], argc + 1);
3250        setari_u(intvar[ARGV], 0, "awk");
3251        i = 0;
3252        while (*argv)
3253                setari_u(intvar[ARGV], ++i, *argv++);
3254
3255        evaluate(beginseq.first, &tv);
3256        if (!mainseq.first && !endseq.first)
3257                awk_exit(EXIT_SUCCESS);
3258
3259        /* input file could already be opened in BEGIN block */
3260        if (!iF)
3261                iF = next_input_file();
3262
3263        /* passing through input files */
3264        while (iF) {
3265                nextfile = FALSE;
3266                setvar_i(intvar[FNR], 0);
3267
3268                while ((i = awk_getline(iF, intvar[F0])) > 0) {
3269                        nextrec = FALSE;
3270                        incvar(intvar[NR]);
3271                        incvar(intvar[FNR]);
3272                        evaluate(mainseq.first, &tv);
3273
3274                        if (nextfile)
3275                                break;
3276                }
3277
3278                if (i < 0)
3279                        syntax_error(strerror(errno));
3280
3281                iF = next_input_file();
3282        }
3283
3284        awk_exit(EXIT_SUCCESS);
3285        /*return 0;*/
3286}
3287