busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9//config:config AWK
  10//config:       bool "awk (23 kb)"
  11//config:       default y
  12//config:       help
  13//config:       Awk is used as a pattern scanning and processing language.
  14//config:
  15//config:config FEATURE_AWK_LIBM
  16//config:       bool "Enable math functions (requires libm)"
  17//config:       default y
  18//config:       depends on AWK
  19//config:       help
  20//config:       Enable math functions of the Awk programming language.
  21//config:       NOTE: This requires libm to be present for linking.
  22//config:
  23//config:config FEATURE_AWK_GNU_EXTENSIONS
  24//config:       bool "Enable a few GNU extensions"
  25//config:       default y
  26//config:       depends on AWK
  27//config:       help
  28//config:       Enable a few features from gawk:
  29//config:       * command line option -e AWK_PROGRAM
  30//config:       * simultaneous use of -f and -e on the command line.
  31//config:       This enables the use of awk library files.
  32//config:       Example: awk -f mylib.awk -e '{print myfunction($1);}' ...
  33
  34//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
  35
  36//kbuild:lib-$(CONFIG_AWK) += awk.o
  37
  38//usage:#define awk_trivial_usage
  39//usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
  40//usage:#define awk_full_usage "\n\n"
  41//usage:       "        -v VAR=VAL      Set variable"
  42//usage:     "\n        -F SEP          Use SEP as field separator"
  43//usage:     "\n        -f FILE         Read program from FILE"
  44//usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
  45//usage:     "\n        -e AWK_PROGRAM"
  46//usage:        )
  47
  48#include "libbb.h"
  49#include "xregex.h"
  50#include <math.h>
  51
  52/* This is a NOEXEC applet. Be very careful! */
  53
  54
  55/* If you comment out one of these below, it will be #defined later
  56 * to perform debug printfs to stderr: */
  57#define debug_printf_walker(...)  do {} while (0)
  58#define debug_printf_eval(...)  do {} while (0)
  59#define debug_printf_parse(...)  do {} while (0)
  60
  61#ifndef debug_printf_walker
  62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
  63#endif
  64#ifndef debug_printf_eval
  65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
  66#endif
  67#ifndef debug_printf_parse
  68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
  69#endif
  70
  71
  72/* "+": stop on first non-option:
  73 * $ awk 'BEGIN { for(i=1; i<ARGC; ++i) { print i ": " ARGV[i] }}' -argz
  74 * 1: -argz
  75 */
  76#define OPTSTR_AWK "+" \
  77        "F:v:*f:*" \
  78        IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
  79        "W:"
  80enum {
  81        OPTBIT_F,       /* define field separator */
  82        OPTBIT_v,       /* define variable */
  83        OPTBIT_f,       /* pull in awk program from file */
  84        IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
  85        OPTBIT_W,       /* -W ignored */
  86        OPT_F = 1 << OPTBIT_F,
  87        OPT_v = 1 << OPTBIT_v,
  88        OPT_f = 1 << OPTBIT_f,
  89        OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
  90        OPT_W = 1 << OPTBIT_W
  91};
  92
  93#define MAXVARFMT       240
  94#define MINNVBLOCK      64
  95
  96/* variable flags */
  97#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  98#define VF_ARRAY        0x0002  /* 1 = it's an array */
  99
 100#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
 101#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
 102#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
 103#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
 104#define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
 105#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
 106#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
 107
 108/* these flags are static, don't change them when value is changed */
 109#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
 110
 111typedef struct walker_list {
 112        char *end;
 113        char *cur;
 114        struct walker_list *prev;
 115        char wbuf[1];
 116} walker_list;
 117
 118/* Variable */
 119typedef struct var_s {
 120        unsigned type;            /* flags */
 121        double number;
 122        char *string;
 123        union {
 124                int aidx;               /* func arg idx (for compilation stage) */
 125                struct xhash_s *array;  /* array ptr */
 126                struct var_s *parent;   /* for func args, ptr to actual parameter */
 127                walker_list *walker;    /* list of array elements (for..in) */
 128        } x;
 129} var;
 130
 131/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
 132typedef struct chain_s {
 133        struct node_s *first;
 134        struct node_s *last;
 135        const char *programname;
 136} chain;
 137
 138/* Function */
 139typedef struct func_s {
 140        unsigned nargs;
 141        struct chain_s body;
 142} func;
 143
 144/* I/O stream */
 145typedef struct rstream_s {
 146        FILE *F;
 147        char *buffer;
 148        int adv;
 149        int size;
 150        int pos;
 151        smallint is_pipe;
 152} rstream;
 153
 154typedef struct hash_item_s {
 155        union {
 156                struct var_s v;         /* variable/array hash */
 157                struct rstream_s rs;    /* redirect streams hash */
 158                struct func_s f;        /* functions hash */
 159        } data;
 160        struct hash_item_s *next;       /* next in chain */
 161        char name[1];                   /* really it's longer */
 162} hash_item;
 163
 164typedef struct xhash_s {
 165        unsigned nel;           /* num of elements */
 166        unsigned csize;         /* current hash size */
 167        unsigned nprime;        /* next hash size in PRIMES[] */
 168        unsigned glen;          /* summary length of item names */
 169        struct hash_item_s **items;
 170} xhash;
 171
 172/* Tree node */
 173typedef struct node_s {
 174        uint32_t info;
 175        unsigned lineno;
 176        union {
 177                struct node_s *n;
 178                var *v;
 179                int aidx;
 180                char *new_progname;
 181                regex_t *re;
 182        } l;
 183        union {
 184                struct node_s *n;
 185                regex_t *ire;
 186                func *f;
 187        } r;
 188        union {
 189                struct node_s *n;
 190        } a;
 191} node;
 192
 193/* Block of temporary variables */
 194typedef struct nvblock_s {
 195        int size;
 196        var *pos;
 197        struct nvblock_s *prev;
 198        struct nvblock_s *next;
 199        var nv[];
 200} nvblock;
 201
 202typedef struct tsplitter_s {
 203        node n;
 204        regex_t re[2];
 205} tsplitter;
 206
 207/* simple token classes */
 208/* Order and hex values are very important!!!  See next_token() */
 209#define TC_SEQSTART     (1 << 0)                /* ( */
 210#define TC_SEQTERM      (1 << 1)                /* ) */
 211#define TC_REGEXP       (1 << 2)                /* /.../ */
 212#define TC_OUTRDR       (1 << 3)                /* | > >> */
 213#define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
 214#define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
 215#define TC_BINOPX       (1 << 6)                /* two-opnd operator */
 216#define TC_IN           (1 << 7)
 217#define TC_COMMA        (1 << 8)
 218#define TC_PIPE         (1 << 9)                /* input redirection pipe */
 219#define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
 220#define TC_ARRTERM      (1 << 11)               /* ] */
 221#define TC_GRPSTART     (1 << 12)               /* { */
 222#define TC_GRPTERM      (1 << 13)               /* } */
 223#define TC_SEMICOL      (1 << 14)
 224#define TC_NEWLINE      (1 << 15)
 225#define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
 226#define TC_WHILE        (1 << 17)
 227#define TC_ELSE         (1 << 18)
 228#define TC_BUILTIN      (1 << 19)
 229/* This costs ~50 bytes of code.
 230 * A separate class to support deprecated "length" form. If we don't need that
 231 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
 232 * can be merged with TC_BUILTIN:
 233 */
 234#define TC_LENGTH       (1 << 20)
 235#define TC_GETLINE      (1 << 21)
 236#define TC_FUNCDECL     (1 << 22)               /* 'function' 'func' */
 237#define TC_BEGIN        (1 << 23)
 238#define TC_END          (1 << 24)
 239#define TC_EOF          (1 << 25)
 240#define TC_VARIABLE     (1 << 26)
 241#define TC_ARRAY        (1 << 27)
 242#define TC_FUNCTION     (1 << 28)
 243#define TC_STRING       (1 << 29)
 244#define TC_NUMBER       (1 << 30)
 245
 246#define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
 247
 248/* combined token classes */
 249#define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 250//#define       TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 251#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 252                   | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 253                   | TC_SEQSTART | TC_STRING | TC_NUMBER)
 254
 255#define TC_STATEMNT (TC_STATX | TC_WHILE)
 256#define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
 257
 258/* word tokens, cannot mean something else if not expected */
 259#define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE \
 260                   | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 261                   | TC_FUNCDECL | TC_BEGIN | TC_END)
 262
 263/* discard newlines after these */
 264#define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
 265                   | TC_BINOP | TC_OPTERM)
 266
 267/* what can expression begin with */
 268#define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
 269/* what can group begin with */
 270#define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
 271
 272/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 273/* operator is inserted between them */
 274#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
 275                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
 276#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 277
 278#define OF_RES1     0x010000
 279#define OF_RES2     0x020000
 280#define OF_STR1     0x040000
 281#define OF_STR2     0x080000
 282#define OF_NUM1     0x100000
 283#define OF_CHECKED  0x200000
 284#define OF_REQUIRED 0x400000
 285
 286
 287/* combined operator flags */
 288#define xx      0
 289#define xV      OF_RES2
 290#define xS      (OF_RES2 | OF_STR2)
 291#define Vx      OF_RES1
 292#define Rx      (OF_RES1 | OF_NUM1 | OF_REQUIRED)
 293#define VV      (OF_RES1 | OF_RES2)
 294#define Nx      (OF_RES1 | OF_NUM1)
 295#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 296#define Sx      (OF_RES1 | OF_STR1)
 297#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 298#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 299
 300#define OPCLSMASK 0xFF00
 301#define OPNMASK   0x007F
 302
 303/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 304 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
 305 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
 306 */
 307#undef P
 308#undef PRIMASK
 309#undef PRIMASK2
 310#define P(x)      (x << 24)
 311#define PRIMASK   0x7F000000
 312#define PRIMASK2  0x7E000000
 313
 314/* Operation classes */
 315
 316#define SHIFT_TIL_THIS  0x0600
 317#define RECUR_FROM_THIS 0x1000
 318
 319enum {
 320        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 321        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 322
 323        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 324        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 325        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 326
 327        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 328        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 329        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 330        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 331        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 332        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 333        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 334        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 335        OC_DONE = 0x2800,
 336
 337        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 338        ST_WHILE = 0x3300
 339};
 340
 341/* simple builtins */
 342enum {
 343        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 344        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 345};
 346
 347/* builtins */
 348enum {
 349        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
 350        B_ge,   B_gs,   B_su,
 351        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 352};
 353
 354/* tokens and their corresponding info values */
 355
 356#define NTC     "\377"  /* switch to next token class (tc<<1) */
 357#define NTCC    '\377'
 358
 359static const char tokenlist[] ALIGN1 =
 360        "\1("         NTC                                   /* TC_SEQSTART */
 361        "\1)"         NTC                                   /* TC_SEQTERM */
 362        "\1/"         NTC                                   /* TC_REGEXP */
 363        "\2>>"        "\1>"         "\1|"       NTC         /* TC_OUTRDR */
 364        "\2++"        "\2--"        NTC                     /* TC_UOPPOST */
 365        "\2++"        "\2--"        "\1$"       NTC         /* TC_UOPPRE1 */
 366        "\2=="        "\1="         "\2+="      "\2-="      /* TC_BINOPX */
 367        "\2*="        "\2/="        "\2%="      "\2^="
 368        "\1+"         "\1-"         "\3**="     "\2**"
 369        "\1/"         "\1%"         "\1^"       "\1*"
 370        "\2!="        "\2>="        "\2<="      "\1>"
 371        "\1<"         "\2!~"        "\1~"       "\2&&"
 372        "\2||"        "\1?"         "\1:"       NTC
 373        "\2in"        NTC                                   /* TC_IN */
 374        "\1,"         NTC                                   /* TC_COMMA */
 375        "\1|"         NTC                                   /* TC_PIPE */
 376        "\1+"         "\1-"         "\1!"       NTC         /* TC_UOPPRE2 */
 377        "\1]"         NTC                                   /* TC_ARRTERM */
 378        "\1{"         NTC                                   /* TC_GRPSTART */
 379        "\1}"         NTC                                   /* TC_GRPTERM */
 380        "\1;"         NTC                                   /* TC_SEMICOL */
 381        "\1\n"        NTC                                   /* TC_NEWLINE */
 382        "\2if"        "\2do"        "\3for"     "\5break"   /* TC_STATX */
 383        "\10continue" "\6delete"    "\5print"
 384        "\6printf"    "\4next"      "\10nextfile"
 385        "\6return"    "\4exit"      NTC
 386        "\5while"     NTC                                   /* TC_WHILE */
 387        "\4else"      NTC                                   /* TC_ELSE */
 388        "\3and"       "\5compl"     "\6lshift"  "\2or"      /* TC_BUILTIN */
 389        "\6rshift"    "\3xor"
 390        "\5close"     "\6system"    "\6fflush"  "\5atan2"
 391        "\3cos"       "\3exp"       "\3int"     "\3log"
 392        "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
 393        "\6gensub"    "\4gsub"      "\5index"   /* "\6length" was here */
 394        "\5match"     "\5split"     "\7sprintf" "\3sub"
 395        "\6substr"    "\7systime"   "\10strftime" "\6mktime"
 396        "\7tolower"   "\7toupper"   NTC
 397        "\6length"    NTC                                   /* TC_LENGTH */
 398        "\7getline"   NTC                                   /* TC_GETLINE */
 399        "\4func"      "\10function" NTC                     /* TC_FUNCDECL */
 400        "\5BEGIN"     NTC                                   /* TC_BEGIN */
 401        "\3END"                                             /* TC_END */
 402        /* compiler adds trailing "\0" */
 403        ;
 404
 405#define OC_B  OC_BUILTIN
 406
 407static const uint32_t tokeninfo[] = {
 408        0,
 409        0,
 410        OC_REGEXP,
 411        xS|'a',                  xS|'w',                  xS|'|',
 412        OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 413        OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
 414        OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
 415        OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
 416        OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
 417        OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
 418        OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 419        OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 420        OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
 421        OC_IN|SV|P(49), /* TC_IN */
 422        OC_COMMA|SS|P(80),
 423        OC_PGETLINE|SV|P(37),
 424        OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 425        0, /* ] */
 426        0,
 427        0,
 428        0,
 429        0, /* \n */
 430        ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 431        OC_CONTINUE,  OC_DELETE|Rx, OC_PRINT,
 432        OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
 433        OC_RETURN|Vx, OC_EXIT|Nx,
 434        ST_WHILE,
 435        0, /* else */
 436        OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
 437        OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
 438        OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
 439        OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
 440        OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
 441        OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
 442        OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
 443        OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
 444        OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
 445        OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
 446        OC_GETLINE|SV|P(0),
 447        0,                 0,
 448        0,
 449        0 /* TC_END */
 450};
 451
 452/* internal variable names and their initial values       */
 453/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 454enum {
 455        CONVFMT,    OFMT,       FS,         OFS,
 456        ORS,        RS,         RT,         FILENAME,
 457        SUBSEP,     F0,         ARGIND,     ARGC,
 458        ARGV,       ERRNO,      FNR,        NR,
 459        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 460};
 461
 462static const char vNames[] ALIGN1 =
 463        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 464        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 465        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 466        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 467        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 468
 469static const char vValues[] ALIGN1 =
 470        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 471        "\n\0"      "\n\0"      "\0"        "\0"
 472        "\034\0"    "\0"        "\377";
 473
 474/* hash size may grow to these values */
 475#define FIRST_PRIME 61
 476static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 477
 478
 479/* Globals. Split in two parts so that first one is addressed
 480 * with (mostly short) negative offsets.
 481 * NB: it's unsafe to put members of type "double"
 482 * into globals2 (gcc may fail to align them).
 483 */
 484struct globals {
 485        double t_double;
 486        chain beginseq, mainseq, endseq;
 487        chain *seq;
 488        node *break_ptr, *continue_ptr;
 489        rstream *iF;
 490        xhash *vhash, *ahash, *fdhash, *fnhash;
 491        const char *g_progname;
 492        int g_lineno;
 493        int nfields;
 494        int maxfields; /* used in fsrealloc() only */
 495        var *Fields;
 496        nvblock *g_cb;
 497        char *g_pos;
 498        char *g_buf;
 499        smallint icase;
 500        smallint exiting;
 501        smallint nextrec;
 502        smallint nextfile;
 503        smallint is_f0_split;
 504        smallint t_rollback;
 505};
 506struct globals2 {
 507        uint32_t t_info; /* often used */
 508        uint32_t t_tclass;
 509        char *t_string;
 510        int t_lineno;
 511
 512        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 513
 514        /* former statics from various functions */
 515        char *split_f0__fstrings;
 516
 517        uint32_t next_token__save_tclass;
 518        uint32_t next_token__save_info;
 519        uint32_t next_token__ltclass;
 520        smallint next_token__concat_inserted;
 521
 522        smallint next_input_file__files_happen;
 523        rstream next_input_file__rsm;
 524
 525        var *evaluate__fnargs;
 526        unsigned evaluate__seed;
 527        regex_t evaluate__sreg;
 528
 529        var ptest__v;
 530
 531        tsplitter exec_builtin__tspl;
 532
 533        /* biggest and least used members go last */
 534        tsplitter fsplitter, rsplitter;
 535};
 536#define G1 (ptr_to_globals[-1])
 537#define G (*(struct globals2 *)ptr_to_globals)
 538/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 539/*char G1size[sizeof(G1)]; - 0x74 */
 540/*char Gsize[sizeof(G)]; - 0x1c4 */
 541/* Trying to keep most of members accessible with short offsets: */
 542/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
 543#define t_double     (G1.t_double    )
 544#define beginseq     (G1.beginseq    )
 545#define mainseq      (G1.mainseq     )
 546#define endseq       (G1.endseq      )
 547#define seq          (G1.seq         )
 548#define break_ptr    (G1.break_ptr   )
 549#define continue_ptr (G1.continue_ptr)
 550#define iF           (G1.iF          )
 551#define vhash        (G1.vhash       )
 552#define ahash        (G1.ahash       )
 553#define fdhash       (G1.fdhash      )
 554#define fnhash       (G1.fnhash      )
 555#define g_progname   (G1.g_progname  )
 556#define g_lineno     (G1.g_lineno    )
 557#define nfields      (G1.nfields     )
 558#define maxfields    (G1.maxfields   )
 559#define Fields       (G1.Fields      )
 560#define g_cb         (G1.g_cb        )
 561#define g_pos        (G1.g_pos       )
 562#define g_buf        (G1.g_buf       )
 563#define icase        (G1.icase       )
 564#define exiting      (G1.exiting     )
 565#define nextrec      (G1.nextrec     )
 566#define nextfile     (G1.nextfile    )
 567#define is_f0_split  (G1.is_f0_split )
 568#define t_rollback   (G1.t_rollback  )
 569#define t_info       (G.t_info      )
 570#define t_tclass     (G.t_tclass    )
 571#define t_string     (G.t_string    )
 572#define t_lineno     (G.t_lineno    )
 573#define intvar       (G.intvar      )
 574#define fsplitter    (G.fsplitter   )
 575#define rsplitter    (G.rsplitter   )
 576#define INIT_G() do { \
 577        SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
 578        G.next_token__ltclass = TC_OPTERM; \
 579        G.evaluate__seed = 1; \
 580} while (0)
 581
 582
 583/* function prototypes */
 584static void handle_special(var *);
 585static node *parse_expr(uint32_t);
 586static void chain_group(void);
 587static var *evaluate(node *, var *);
 588static rstream *next_input_file(void);
 589static int fmt_num(char *, int, const char *, double, int);
 590static int awk_exit(int) NORETURN;
 591
 592/* ---- error handling ---- */
 593
 594static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
 595static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 596static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 597static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 598static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 599static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
 600static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 601static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 602static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 603static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 604static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
 605
 606static void zero_out_var(var *vp)
 607{
 608        memset(vp, 0, sizeof(*vp));
 609}
 610
 611static void syntax_error(const char *message) NORETURN;
 612static void syntax_error(const char *message)
 613{
 614        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 615}
 616
 617/* ---- hash stuff ---- */
 618
 619static unsigned hashidx(const char *name)
 620{
 621        unsigned idx = 0;
 622
 623        while (*name)
 624                idx = *name++ + (idx << 6) - idx;
 625        return idx;
 626}
 627
 628/* create new hash */
 629static xhash *hash_init(void)
 630{
 631        xhash *newhash;
 632
 633        newhash = xzalloc(sizeof(*newhash));
 634        newhash->csize = FIRST_PRIME;
 635        newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
 636
 637        return newhash;
 638}
 639
 640/* find item in hash, return ptr to data, NULL if not found */
 641static void *hash_search(xhash *hash, const char *name)
 642{
 643        hash_item *hi;
 644
 645        hi = hash->items[hashidx(name) % hash->csize];
 646        while (hi) {
 647                if (strcmp(hi->name, name) == 0)
 648                        return &hi->data;
 649                hi = hi->next;
 650        }
 651        return NULL;
 652}
 653
 654/* grow hash if it becomes too big */
 655static void hash_rebuild(xhash *hash)
 656{
 657        unsigned newsize, i, idx;
 658        hash_item **newitems, *hi, *thi;
 659
 660        if (hash->nprime == ARRAY_SIZE(PRIMES))
 661                return;
 662
 663        newsize = PRIMES[hash->nprime++];
 664        newitems = xzalloc(newsize * sizeof(newitems[0]));
 665
 666        for (i = 0; i < hash->csize; i++) {
 667                hi = hash->items[i];
 668                while (hi) {
 669                        thi = hi;
 670                        hi = thi->next;
 671                        idx = hashidx(thi->name) % newsize;
 672                        thi->next = newitems[idx];
 673                        newitems[idx] = thi;
 674                }
 675        }
 676
 677        free(hash->items);
 678        hash->csize = newsize;
 679        hash->items = newitems;
 680}
 681
 682/* find item in hash, add it if necessary. Return ptr to data */
 683static void *hash_find(xhash *hash, const char *name)
 684{
 685        hash_item *hi;
 686        unsigned idx;
 687        int l;
 688
 689        hi = hash_search(hash, name);
 690        if (!hi) {
 691                if (++hash->nel / hash->csize > 10)
 692                        hash_rebuild(hash);
 693
 694                l = strlen(name) + 1;
 695                hi = xzalloc(sizeof(*hi) + l);
 696                strcpy(hi->name, name);
 697
 698                idx = hashidx(name) % hash->csize;
 699                hi->next = hash->items[idx];
 700                hash->items[idx] = hi;
 701                hash->glen += l;
 702        }
 703        return &hi->data;
 704}
 705
 706#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 707#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 708#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 709#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 710
 711static void hash_remove(xhash *hash, const char *name)
 712{
 713        hash_item *hi, **phi;
 714
 715        phi = &hash->items[hashidx(name) % hash->csize];
 716        while (*phi) {
 717                hi = *phi;
 718                if (strcmp(hi->name, name) == 0) {
 719                        hash->glen -= (strlen(name) + 1);
 720                        hash->nel--;
 721                        *phi = hi->next;
 722                        free(hi);
 723                        break;
 724                }
 725                phi = &hi->next;
 726        }
 727}
 728
 729/* ------ some useful functions ------ */
 730
 731static char *skip_spaces(char *p)
 732{
 733        while (1) {
 734                if (*p == '\\' && p[1] == '\n') {
 735                        p++;
 736                        t_lineno++;
 737                } else if (*p != ' ' && *p != '\t') {
 738                        break;
 739                }
 740                p++;
 741        }
 742        return p;
 743}
 744
 745/* returns old *s, advances *s past word and terminating NUL */
 746static char *nextword(char **s)
 747{
 748        char *p = *s;
 749        while (*(*s)++ != '\0')
 750                continue;
 751        return p;
 752}
 753
 754static char nextchar(char **s)
 755{
 756        char c, *pps;
 757
 758        c = *(*s)++;
 759        pps = *s;
 760        if (c == '\\')
 761                c = bb_process_escape_sequence((const char**)s);
 762        /* Example awk statement:
 763         * s = "abc\"def"
 764         * we must treat \" as "
 765         */
 766        if (c == '\\' && *s == pps) { /* unrecognized \z? */
 767                c = *(*s); /* yes, fetch z */
 768                if (c)
 769                        (*s)++; /* advance unless z = NUL */
 770        }
 771        return c;
 772}
 773
 774/* TODO: merge with strcpy_and_process_escape_sequences()?
 775 */
 776static void unescape_string_in_place(char *s1)
 777{
 778        char *s = s1;
 779        while ((*s1 = nextchar(&s)) != '\0')
 780                s1++;
 781}
 782
 783static ALWAYS_INLINE int isalnum_(int c)
 784{
 785        return (isalnum(c) || c == '_');
 786}
 787
 788static double my_strtod(char **pp)
 789{
 790        char *cp = *pp;
 791        if (ENABLE_DESKTOP && cp[0] == '0') {
 792                /* Might be hex or octal integer: 0x123abc or 07777 */
 793                char c = (cp[1] | 0x20);
 794                if (c == 'x' || isdigit(cp[1])) {
 795                        unsigned long long ull = strtoull(cp, pp, 0);
 796                        if (c == 'x')
 797                                return ull;
 798                        c = **pp;
 799                        if (!isdigit(c) && c != '.')
 800                                return ull;
 801                        /* else: it may be a floating number. Examples:
 802                         * 009.123 (*pp points to '9')
 803                         * 000.123 (*pp points to '.')
 804                         * fall through to strtod.
 805                         */
 806                }
 807        }
 808        return strtod(cp, pp);
 809}
 810
 811/* -------- working with variables (set/get/copy/etc) -------- */
 812
 813static xhash *iamarray(var *v)
 814{
 815        var *a = v;
 816
 817        while (a->type & VF_CHILD)
 818                a = a->x.parent;
 819
 820        if (!(a->type & VF_ARRAY)) {
 821                a->type |= VF_ARRAY;
 822                a->x.array = hash_init();
 823        }
 824        return a->x.array;
 825}
 826
 827static void clear_array(xhash *array)
 828{
 829        unsigned i;
 830        hash_item *hi, *thi;
 831
 832        for (i = 0; i < array->csize; i++) {
 833                hi = array->items[i];
 834                while (hi) {
 835                        thi = hi;
 836                        hi = hi->next;
 837                        free(thi->data.v.string);
 838                        free(thi);
 839                }
 840                array->items[i] = NULL;
 841        }
 842        array->glen = array->nel = 0;
 843}
 844
 845/* clear a variable */
 846static var *clrvar(var *v)
 847{
 848        if (!(v->type & VF_FSTR))
 849                free(v->string);
 850
 851        v->type &= VF_DONTTOUCH;
 852        v->type |= VF_DIRTY;
 853        v->string = NULL;
 854        return v;
 855}
 856
 857/* assign string value to variable */
 858static var *setvar_p(var *v, char *value)
 859{
 860        clrvar(v);
 861        v->string = value;
 862        handle_special(v);
 863        return v;
 864}
 865
 866/* same as setvar_p but make a copy of string */
 867static var *setvar_s(var *v, const char *value)
 868{
 869        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 870}
 871
 872/* same as setvar_s but sets USER flag */
 873static var *setvar_u(var *v, const char *value)
 874{
 875        v = setvar_s(v, value);
 876        v->type |= VF_USER;
 877        return v;
 878}
 879
 880/* set array element to user string */
 881static void setari_u(var *a, int idx, const char *s)
 882{
 883        var *v;
 884
 885        v = findvar(iamarray(a), itoa(idx));
 886        setvar_u(v, s);
 887}
 888
 889/* assign numeric value to variable */
 890static var *setvar_i(var *v, double value)
 891{
 892        clrvar(v);
 893        v->type |= VF_NUMBER;
 894        v->number = value;
 895        handle_special(v);
 896        return v;
 897}
 898
 899static const char *getvar_s(var *v)
 900{
 901        /* if v is numeric and has no cached string, convert it to string */
 902        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
 903                fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
 904                v->string = xstrdup(g_buf);
 905                v->type |= VF_CACHED;
 906        }
 907        return (v->string == NULL) ? "" : v->string;
 908}
 909
 910static double getvar_i(var *v)
 911{
 912        char *s;
 913
 914        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
 915                v->number = 0;
 916                s = v->string;
 917                if (s && *s) {
 918                        debug_printf_eval("getvar_i: '%s'->", s);
 919                        v->number = my_strtod(&s);
 920                        debug_printf_eval("%f (s:'%s')\n", v->number, s);
 921                        if (v->type & VF_USER) {
 922                                s = skip_spaces(s);
 923                                if (*s != '\0')
 924                                        v->type &= ~VF_USER;
 925                        }
 926                } else {
 927                        debug_printf_eval("getvar_i: '%s'->zero\n", s);
 928                        v->type &= ~VF_USER;
 929                }
 930                v->type |= VF_CACHED;
 931        }
 932        debug_printf_eval("getvar_i: %f\n", v->number);
 933        return v->number;
 934}
 935
 936/* Used for operands of bitwise ops */
 937static unsigned long getvar_i_int(var *v)
 938{
 939        double d = getvar_i(v);
 940
 941        /* Casting doubles to longs is undefined for values outside
 942         * of target type range. Try to widen it as much as possible */
 943        if (d >= 0)
 944                return (unsigned long)d;
 945        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
 946        return - (long) (unsigned long) (-d);
 947}
 948
 949static var *copyvar(var *dest, const var *src)
 950{
 951        if (dest != src) {
 952                clrvar(dest);
 953                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
 954                debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
 955                dest->number = src->number;
 956                if (src->string)
 957                        dest->string = xstrdup(src->string);
 958        }
 959        handle_special(dest);
 960        return dest;
 961}
 962
 963static var *incvar(var *v)
 964{
 965        return setvar_i(v, getvar_i(v) + 1.0);
 966}
 967
 968/* return true if v is number or numeric string */
 969static int is_numeric(var *v)
 970{
 971        getvar_i(v);
 972        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
 973}
 974
 975/* return 1 when value of v corresponds to true, 0 otherwise */
 976static int istrue(var *v)
 977{
 978        if (is_numeric(v))
 979                return (v->number != 0);
 980        return (v->string && v->string[0]);
 981}
 982
 983/* temporary variables allocator. Last allocated should be first freed */
 984static var *nvalloc(int n)
 985{
 986        nvblock *pb = NULL;
 987        var *v, *r;
 988        int size;
 989
 990        while (g_cb) {
 991                pb = g_cb;
 992                if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
 993                        break;
 994                g_cb = g_cb->next;
 995        }
 996
 997        if (!g_cb) {
 998                size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
 999                g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
1000                g_cb->size = size;
1001                g_cb->pos = g_cb->nv;
1002                g_cb->prev = pb;
1003                /*g_cb->next = NULL; - xzalloc did it */
1004                if (pb)
1005                        pb->next = g_cb;
1006        }
1007
1008        v = r = g_cb->pos;
1009        g_cb->pos += n;
1010
1011        while (v < g_cb->pos) {
1012                v->type = 0;
1013                v->string = NULL;
1014                v++;
1015        }
1016
1017        return r;
1018}
1019
1020static void nvfree(var *v)
1021{
1022        var *p;
1023
1024        if (v < g_cb->nv || v >= g_cb->pos)
1025                syntax_error(EMSG_INTERNAL_ERROR);
1026
1027        for (p = v; p < g_cb->pos; p++) {
1028                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1029                        clear_array(iamarray(p));
1030                        free(p->x.array->items);
1031                        free(p->x.array);
1032                }
1033                if (p->type & VF_WALK) {
1034                        walker_list *n;
1035                        walker_list *w = p->x.walker;
1036                        debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1037                        p->x.walker = NULL;
1038                        while (w) {
1039                                n = w->prev;
1040                                debug_printf_walker(" free(%p)\n", w);
1041                                free(w);
1042                                w = n;
1043                        }
1044                }
1045                clrvar(p);
1046        }
1047
1048        g_cb->pos = v;
1049        while (g_cb->prev && g_cb->pos == g_cb->nv) {
1050                g_cb = g_cb->prev;
1051        }
1052}
1053
1054/* ------- awk program text parsing ------- */
1055
1056/* Parse next token pointed by global pos, place results into global ttt.
1057 * If token isn't expected, give away. Return token class
1058 */
1059static uint32_t next_token(uint32_t expected)
1060{
1061#define concat_inserted (G.next_token__concat_inserted)
1062#define save_tclass     (G.next_token__save_tclass)
1063#define save_info       (G.next_token__save_info)
1064/* Initialized to TC_OPTERM: */
1065#define ltclass         (G.next_token__ltclass)
1066
1067        char *p, *s;
1068        const char *tl;
1069        uint32_t tc;
1070        const uint32_t *ti;
1071
1072        if (t_rollback) {
1073                t_rollback = FALSE;
1074        } else if (concat_inserted) {
1075                concat_inserted = FALSE;
1076                t_tclass = save_tclass;
1077                t_info = save_info;
1078        } else {
1079                p = g_pos;
1080 readnext:
1081                p = skip_spaces(p);
1082                g_lineno = t_lineno;
1083                if (*p == '#')
1084                        while (*p != '\n' && *p != '\0')
1085                                p++;
1086
1087                if (*p == '\n')
1088                        t_lineno++;
1089
1090                if (*p == '\0') {
1091                        tc = TC_EOF;
1092                        debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1093                } else if (*p == '\"') {
1094                        /* it's a string */
1095                        t_string = s = ++p;
1096                        while (*p != '\"') {
1097                                char *pp;
1098                                if (*p == '\0' || *p == '\n')
1099                                        syntax_error(EMSG_UNEXP_EOS);
1100                                pp = p;
1101                                *s++ = nextchar(&pp);
1102                                p = pp;
1103                        }
1104                        p++;
1105                        *s = '\0';
1106                        tc = TC_STRING;
1107                        debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1108                } else if ((expected & TC_REGEXP) && *p == '/') {
1109                        /* it's regexp */
1110                        t_string = s = ++p;
1111                        while (*p != '/') {
1112                                if (*p == '\0' || *p == '\n')
1113                                        syntax_error(EMSG_UNEXP_EOS);
1114                                *s = *p++;
1115                                if (*s++ == '\\') {
1116                                        char *pp = p;
1117                                        s[-1] = bb_process_escape_sequence((const char **)&pp);
1118                                        if (*p == '\\')
1119                                                *s++ = '\\';
1120                                        if (pp == p)
1121                                                *s++ = *p++;
1122                                        else
1123                                                p = pp;
1124                                }
1125                        }
1126                        p++;
1127                        *s = '\0';
1128                        tc = TC_REGEXP;
1129                        debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1130
1131                } else if (*p == '.' || isdigit(*p)) {
1132                        /* it's a number */
1133                        char *pp = p;
1134                        t_double = my_strtod(&pp);
1135                        p = pp;
1136                        if (*p == '.')
1137                                syntax_error(EMSG_UNEXP_TOKEN);
1138                        tc = TC_NUMBER;
1139                        debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1140                } else {
1141                        /* search for something known */
1142                        tl = tokenlist;
1143                        tc = 0x00000001;
1144                        ti = tokeninfo;
1145                        while (*tl) {
1146                                int l = (unsigned char) *tl++;
1147                                if (l == (unsigned char) NTCC) {
1148                                        tc <<= 1;
1149                                        continue;
1150                                }
1151                                /* if token class is expected,
1152                                 * token matches,
1153                                 * and it's not a longer word,
1154                                 */
1155                                if ((tc & (expected | TC_WORD | TC_NEWLINE))
1156                                 && strncmp(p, tl, l) == 0
1157                                 && !((tc & TC_WORD) && isalnum_(p[l]))
1158                                ) {
1159                                        /* then this is what we are looking for */
1160                                        t_info = *ti;
1161                                        debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1162                                        p += l;
1163                                        goto token_found;
1164                                }
1165                                ti++;
1166                                tl += l;
1167                        }
1168                        /* not a known token */
1169
1170                        /* is it a name? (var/array/function) */
1171                        if (!isalnum_(*p))
1172                                syntax_error(EMSG_UNEXP_TOKEN); /* no */
1173                        /* yes */
1174                        t_string = --p;
1175                        while (isalnum_(*++p)) {
1176                                p[-1] = *p;
1177                        }
1178                        p[-1] = '\0';
1179                        tc = TC_VARIABLE;
1180                        /* also consume whitespace between functionname and bracket */
1181                        if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1182                                p = skip_spaces(p);
1183                        if (*p == '(') {
1184                                tc = TC_FUNCTION;
1185                                debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1186                        } else {
1187                                if (*p == '[') {
1188                                        p++;
1189                                        tc = TC_ARRAY;
1190                                        debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1191                                } else
1192                                        debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1193                        }
1194                }
1195 token_found:
1196                g_pos = p;
1197
1198                /* skipping newlines in some cases */
1199                if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1200                        goto readnext;
1201
1202                /* insert concatenation operator when needed */
1203                if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1204                        concat_inserted = TRUE;
1205                        save_tclass = tc;
1206                        save_info = t_info;
1207                        tc = TC_BINOP;
1208                        t_info = OC_CONCAT | SS | P(35);
1209                }
1210
1211                t_tclass = tc;
1212        }
1213        ltclass = t_tclass;
1214
1215        /* Are we ready for this? */
1216        if (!(ltclass & expected)) {
1217                syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1218                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1219        }
1220
1221        return ltclass;
1222#undef concat_inserted
1223#undef save_tclass
1224#undef save_info
1225#undef ltclass
1226}
1227
1228static void rollback_token(void)
1229{
1230        t_rollback = TRUE;
1231}
1232
1233static node *new_node(uint32_t info)
1234{
1235        node *n;
1236
1237        n = xzalloc(sizeof(node));
1238        n->info = info;
1239        n->lineno = g_lineno;
1240        return n;
1241}
1242
1243static void mk_re_node(const char *s, node *n, regex_t *re)
1244{
1245        n->info = OC_REGEXP;
1246        n->l.re = re;
1247        n->r.ire = re + 1;
1248        xregcomp(re, s, REG_EXTENDED);
1249        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1250}
1251
1252static node *condition(void)
1253{
1254        next_token(TC_SEQSTART);
1255        return parse_expr(TC_SEQTERM);
1256}
1257
1258/* parse expression terminated by given argument, return ptr
1259 * to built subtree. Terminator is eaten by parse_expr */
1260static node *parse_expr(uint32_t iexp)
1261{
1262        node sn;
1263        node *cn = &sn;
1264        node *vn, *glptr;
1265        uint32_t tc, xtc;
1266        var *v;
1267
1268        debug_printf_parse("%s(%x)\n", __func__, iexp);
1269
1270        sn.info = PRIMASK;
1271        sn.r.n = sn.a.n = glptr = NULL;
1272        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1273
1274        while (!((tc = next_token(xtc)) & iexp)) {
1275
1276                if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1277                        /* input redirection (<) attached to glptr node */
1278                        debug_printf_parse("%s: input redir\n", __func__);
1279                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1280                        cn->a.n = glptr;
1281                        xtc = TC_OPERAND | TC_UOPPRE;
1282                        glptr = NULL;
1283
1284                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1285                        debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1286                        /* for binary and postfix-unary operators, jump back over
1287                         * previous operators with higher priority */
1288                        vn = cn;
1289                        while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1290                            || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1291                        ) {
1292                                vn = vn->a.n;
1293                                if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1294                        }
1295                        if ((t_info & OPCLSMASK) == OC_TERNARY)
1296                                t_info += P(6);
1297                        cn = vn->a.n->r.n = new_node(t_info);
1298                        cn->a.n = vn->a.n;
1299                        if (tc & TC_BINOP) {
1300                                cn->l.n = vn;
1301                                xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1302                                if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1303                                        /* it's a pipe */
1304                                        next_token(TC_GETLINE);
1305                                        /* give maximum priority to this pipe */
1306                                        cn->info &= ~PRIMASK;
1307                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308                                }
1309                        } else {
1310                                cn->r.n = vn;
1311                                xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1312                        }
1313                        vn->a.n = cn;
1314
1315                } else {
1316                        debug_printf_parse("%s: other\n", __func__);
1317                        /* for operands and prefix-unary operators, attach them
1318                         * to last node */
1319                        vn = cn;
1320                        cn = vn->r.n = new_node(t_info);
1321                        cn->a.n = vn;
1322                        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1323                        if (tc & (TC_OPERAND | TC_REGEXP)) {
1324                                debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1325                                xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1326                                /* one should be very careful with switch on tclass -
1327                                 * only simple tclasses should be used! */
1328                                switch (tc) {
1329                                case TC_VARIABLE:
1330                                case TC_ARRAY:
1331                                        debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1332                                        cn->info = OC_VAR;
1333                                        v = hash_search(ahash, t_string);
1334                                        if (v != NULL) {
1335                                                cn->info = OC_FNARG;
1336                                                cn->l.aidx = v->x.aidx;
1337                                        } else {
1338                                                cn->l.v = newvar(t_string);
1339                                        }
1340                                        if (tc & TC_ARRAY) {
1341                                                cn->info |= xS;
1342                                                cn->r.n = parse_expr(TC_ARRTERM);
1343                                        }
1344                                        break;
1345
1346                                case TC_NUMBER:
1347                                case TC_STRING:
1348                                        debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1349                                        cn->info = OC_VAR;
1350                                        v = cn->l.v = xzalloc(sizeof(var));
1351                                        if (tc & TC_NUMBER)
1352                                                setvar_i(v, t_double);
1353                                        else
1354                                                setvar_s(v, t_string);
1355                                        break;
1356
1357                                case TC_REGEXP:
1358                                        debug_printf_parse("%s: TC_REGEXP\n", __func__);
1359                                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1360                                        break;
1361
1362                                case TC_FUNCTION:
1363                                        debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1364                                        cn->info = OC_FUNC;
1365                                        cn->r.f = newfunc(t_string);
1366                                        cn->l.n = condition();
1367                                        break;
1368
1369                                case TC_SEQSTART:
1370                                        debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1371                                        cn = vn->r.n = parse_expr(TC_SEQTERM);
1372                                        if (!cn)
1373                                                syntax_error("Empty sequence");
1374                                        cn->a.n = vn;
1375                                        break;
1376
1377                                case TC_GETLINE:
1378                                        debug_printf_parse("%s: TC_GETLINE\n", __func__);
1379                                        glptr = cn;
1380                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1381                                        break;
1382
1383                                case TC_BUILTIN:
1384                                        debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1385                                        cn->l.n = condition();
1386                                        break;
1387
1388                                case TC_LENGTH:
1389                                        debug_printf_parse("%s: TC_LENGTH\n", __func__);
1390                                        next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
1391                                        rollback_token();
1392                                        if (t_tclass & TC_SEQSTART) {
1393                                                /* It was a "(" token. Handle just like TC_BUILTIN */
1394                                                cn->l.n = condition();
1395                                        }
1396                                        break;
1397                                }
1398                        }
1399                }
1400        }
1401
1402        debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1403        return sn.r.n;
1404}
1405
1406/* add node to chain. Return ptr to alloc'd node */
1407static node *chain_node(uint32_t info)
1408{
1409        node *n;
1410
1411        if (!seq->first)
1412                seq->first = seq->last = new_node(0);
1413
1414        if (seq->programname != g_progname) {
1415                seq->programname = g_progname;
1416                n = chain_node(OC_NEWSOURCE);
1417                n->l.new_progname = xstrdup(g_progname);
1418        }
1419
1420        n = seq->last;
1421        n->info = info;
1422        seq->last = n->a.n = new_node(OC_DONE);
1423
1424        return n;
1425}
1426
1427static void chain_expr(uint32_t info)
1428{
1429        node *n;
1430
1431        n = chain_node(info);
1432
1433        n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1434        if ((info & OF_REQUIRED) && !n->l.n)
1435                syntax_error(EMSG_TOO_FEW_ARGS);
1436
1437        if (t_tclass & TC_GRPTERM)
1438                rollback_token();
1439}
1440
1441static node *chain_loop(node *nn)
1442{
1443        node *n, *n2, *save_brk, *save_cont;
1444
1445        save_brk = break_ptr;
1446        save_cont = continue_ptr;
1447
1448        n = chain_node(OC_BR | Vx);
1449        continue_ptr = new_node(OC_EXEC);
1450        break_ptr = new_node(OC_EXEC);
1451        chain_group();
1452        n2 = chain_node(OC_EXEC | Vx);
1453        n2->l.n = nn;
1454        n2->a.n = n;
1455        continue_ptr->a.n = n2;
1456        break_ptr->a.n = n->r.n = seq->last;
1457
1458        continue_ptr = save_cont;
1459        break_ptr = save_brk;
1460
1461        return n;
1462}
1463
1464/* parse group and attach it to chain */
1465static void chain_group(void)
1466{
1467        uint32_t c;
1468        node *n, *n2, *n3;
1469
1470        do {
1471                c = next_token(TC_GRPSEQ);
1472        } while (c & TC_NEWLINE);
1473
1474        if (c & TC_GRPSTART) {
1475                debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1476                while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1477                        debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1478                        if (t_tclass & TC_NEWLINE)
1479                                continue;
1480                        rollback_token();
1481                        chain_group();
1482                }
1483                debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1484        } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1485                debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1486                rollback_token();
1487                chain_expr(OC_EXEC | Vx);
1488        } else {
1489                /* TC_STATEMNT */
1490                debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1491                switch (t_info & OPCLSMASK) {
1492                case ST_IF:
1493                        debug_printf_parse("%s: ST_IF\n", __func__);
1494                        n = chain_node(OC_BR | Vx);
1495                        n->l.n = condition();
1496                        chain_group();
1497                        n2 = chain_node(OC_EXEC);
1498                        n->r.n = seq->last;
1499                        if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1500                                chain_group();
1501                                n2->a.n = seq->last;
1502                        } else {
1503                                rollback_token();
1504                        }
1505                        break;
1506
1507                case ST_WHILE:
1508                        debug_printf_parse("%s: ST_WHILE\n", __func__);
1509                        n2 = condition();
1510                        n = chain_loop(NULL);
1511                        n->l.n = n2;
1512                        break;
1513
1514                case ST_DO:
1515                        debug_printf_parse("%s: ST_DO\n", __func__);
1516                        n2 = chain_node(OC_EXEC);
1517                        n = chain_loop(NULL);
1518                        n2->a.n = n->a.n;
1519                        next_token(TC_WHILE);
1520                        n->l.n = condition();
1521                        break;
1522
1523                case ST_FOR:
1524                        debug_printf_parse("%s: ST_FOR\n", __func__);
1525                        next_token(TC_SEQSTART);
1526                        n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1527                        if (t_tclass & TC_SEQTERM) {    /* for-in */
1528                                if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
1529                                        syntax_error(EMSG_UNEXP_TOKEN);
1530                                n = chain_node(OC_WALKINIT | VV);
1531                                n->l.n = n2->l.n;
1532                                n->r.n = n2->r.n;
1533                                n = chain_loop(NULL);
1534                                n->info = OC_WALKNEXT | Vx;
1535                                n->l.n = n2->l.n;
1536                        } else {                        /* for (;;) */
1537                                n = chain_node(OC_EXEC | Vx);
1538                                n->l.n = n2;
1539                                n2 = parse_expr(TC_SEMICOL);
1540                                n3 = parse_expr(TC_SEQTERM);
1541                                n = chain_loop(n3);
1542                                n->l.n = n2;
1543                                if (!n2)
1544                                        n->info = OC_EXEC;
1545                        }
1546                        break;
1547
1548                case OC_PRINT:
1549                case OC_PRINTF:
1550                        debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1551                        n = chain_node(t_info);
1552                        n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1553                        if (t_tclass & TC_OUTRDR) {
1554                                n->info |= t_info;
1555                                n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1556                        }
1557                        if (t_tclass & TC_GRPTERM)
1558                                rollback_token();
1559                        break;
1560
1561                case OC_BREAK:
1562                        debug_printf_parse("%s: OC_BREAK\n", __func__);
1563                        n = chain_node(OC_EXEC);
1564                        n->a.n = break_ptr;
1565                        chain_expr(t_info);
1566                        break;
1567
1568                case OC_CONTINUE:
1569                        debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1570                        n = chain_node(OC_EXEC);
1571                        n->a.n = continue_ptr;
1572                        chain_expr(t_info);
1573                        break;
1574
1575                /* delete, next, nextfile, return, exit */
1576                default:
1577                        debug_printf_parse("%s: default\n", __func__);
1578                        chain_expr(t_info);
1579                }
1580        }
1581}
1582
1583static void parse_program(char *p)
1584{
1585        uint32_t tclass;
1586        node *cn;
1587        func *f;
1588        var *v;
1589
1590        g_pos = p;
1591        t_lineno = 1;
1592        while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1593                        TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1594
1595                if (tclass & TC_OPTERM) {
1596                        debug_printf_parse("%s: TC_OPTERM\n", __func__);
1597                        continue;
1598                }
1599
1600                seq = &mainseq;
1601                if (tclass & TC_BEGIN) {
1602                        debug_printf_parse("%s: TC_BEGIN\n", __func__);
1603                        seq = &beginseq;
1604                        chain_group();
1605                } else if (tclass & TC_END) {
1606                        debug_printf_parse("%s: TC_END\n", __func__);
1607                        seq = &endseq;
1608                        chain_group();
1609                } else if (tclass & TC_FUNCDECL) {
1610                        debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1611                        next_token(TC_FUNCTION);
1612                        g_pos++;
1613                        f = newfunc(t_string);
1614                        f->body.first = NULL;
1615                        f->nargs = 0;
1616                        /* Match func arg list: a comma sep list of >= 0 args, and a close paren */
1617                        while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
1618                                /* Either an empty arg list, or trailing comma from prev iter
1619                                 * must be followed by an arg */
1620                                if (f->nargs == 0 && t_tclass == TC_SEQTERM)
1621                                        break;
1622
1623                                /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
1624                                if (t_tclass != TC_VARIABLE)
1625                                        syntax_error(EMSG_UNEXP_TOKEN);
1626
1627                                v = findvar(ahash, t_string);
1628                                v->x.aidx = f->nargs++;
1629
1630                                /* Arg followed either by end of arg list or 1 comma */
1631                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1632                                        break;
1633                                if (t_tclass != TC_COMMA)
1634                                        syntax_error(EMSG_UNEXP_TOKEN);
1635                        }
1636                        seq = &f->body;
1637                        chain_group();
1638                        clear_array(ahash);
1639                } else if (tclass & TC_OPSEQ) {
1640                        debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1641                        rollback_token();
1642                        cn = chain_node(OC_TEST);
1643                        cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1644                        if (t_tclass & TC_GRPSTART) {
1645                                debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1646                                rollback_token();
1647                                chain_group();
1648                        } else {
1649                                debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1650                                chain_node(OC_PRINT);
1651                        }
1652                        cn->r.n = mainseq.last;
1653                } else /* if (tclass & TC_GRPSTART) */ {
1654                        debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1655                        rollback_token();
1656                        chain_group();
1657                }
1658        }
1659        debug_printf_parse("%s: TC_EOF\n", __func__);
1660}
1661
1662
1663/* -------- program execution part -------- */
1664
1665static node *mk_splitter(const char *s, tsplitter *spl)
1666{
1667        regex_t *re, *ire;
1668        node *n;
1669
1670        re = &spl->re[0];
1671        ire = &spl->re[1];
1672        n = &spl->n;
1673        if ((n->info & OPCLSMASK) == OC_REGEXP) {
1674                regfree(re);
1675                regfree(ire); // TODO: nuke ire, use re+1?
1676        }
1677        if (s[0] && s[1]) { /* strlen(s) > 1 */
1678                mk_re_node(s, n, re);
1679        } else {
1680                n->info = (uint32_t) s[0];
1681        }
1682
1683        return n;
1684}
1685
1686/* use node as a regular expression. Supplied with node ptr and regex_t
1687 * storage space. Return ptr to regex (if result points to preg, it should
1688 * be later regfree'd manually
1689 */
1690static regex_t *as_regex(node *op, regex_t *preg)
1691{
1692        int cflags;
1693        var *v;
1694        const char *s;
1695
1696        if ((op->info & OPCLSMASK) == OC_REGEXP) {
1697                return icase ? op->r.ire : op->l.re;
1698        }
1699        v = nvalloc(1);
1700        s = getvar_s(evaluate(op, v));
1701
1702        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1703        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1704         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1705         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1706         * (maybe gsub is not supposed to use REG_EXTENDED?).
1707         */
1708        if (regcomp(preg, s, cflags)) {
1709                cflags &= ~REG_EXTENDED;
1710                xregcomp(preg, s, cflags);
1711        }
1712        nvfree(v);
1713        return preg;
1714}
1715
1716/* gradually increasing buffer.
1717 * note that we reallocate even if n == old_size,
1718 * and thus there is at least one extra allocated byte.
1719 */
1720static char* qrealloc(char *b, int n, int *size)
1721{
1722        if (!b || n >= *size) {
1723                *size = n + (n>>1) + 80;
1724                b = xrealloc(b, *size);
1725        }
1726        return b;
1727}
1728
1729/* resize field storage space */
1730static void fsrealloc(int size)
1731{
1732        int i;
1733
1734        if (size >= maxfields) {
1735                i = maxfields;
1736                maxfields = size + 16;
1737                Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1738                for (; i < maxfields; i++) {
1739                        Fields[i].type = VF_SPECIAL;
1740                        Fields[i].string = NULL;
1741                }
1742        }
1743        /* if size < nfields, clear extra field variables */
1744        for (i = size; i < nfields; i++) {
1745                clrvar(Fields + i);
1746        }
1747        nfields = size;
1748}
1749
1750static int awk_split(const char *s, node *spl, char **slist)
1751{
1752        int l, n;
1753        char c[4];
1754        char *s1;
1755        regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1756
1757        /* in worst case, each char would be a separate field */
1758        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1759        strcpy(s1, s);
1760
1761        c[0] = c[1] = (char)spl->info;
1762        c[2] = c[3] = '\0';
1763        if (*getvar_s(intvar[RS]) == '\0')
1764                c[2] = '\n';
1765
1766        n = 0;
1767        if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1768                if (!*s)
1769                        return n; /* "": zero fields */
1770                n++; /* at least one field will be there */
1771                do {
1772                        l = strcspn(s, c+2); /* len till next NUL or \n */
1773                        if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1774                         && pmatch[0].rm_so <= l
1775                        ) {
1776                                l = pmatch[0].rm_so;
1777                                if (pmatch[0].rm_eo == 0) {
1778                                        l++;
1779                                        pmatch[0].rm_eo++;
1780                                }
1781                                n++; /* we saw yet another delimiter */
1782                        } else {
1783                                pmatch[0].rm_eo = l;
1784                                if (s[l])
1785                                        pmatch[0].rm_eo++;
1786                        }
1787                        memcpy(s1, s, l);
1788                        /* make sure we remove *all* of the separator chars */
1789                        do {
1790                                s1[l] = '\0';
1791                        } while (++l < pmatch[0].rm_eo);
1792                        nextword(&s1);
1793                        s += pmatch[0].rm_eo;
1794                } while (*s);
1795                return n;
1796        }
1797        if (c[0] == '\0') {  /* null split */
1798                while (*s) {
1799                        *s1++ = *s++;
1800                        *s1++ = '\0';
1801                        n++;
1802                }
1803                return n;
1804        }
1805        if (c[0] != ' ') {  /* single-character split */
1806                if (icase) {
1807                        c[0] = toupper(c[0]);
1808                        c[1] = tolower(c[1]);
1809                }
1810                if (*s1)
1811                        n++;
1812                while ((s1 = strpbrk(s1, c)) != NULL) {
1813                        *s1++ = '\0';
1814                        n++;
1815                }
1816                return n;
1817        }
1818        /* space split */
1819        while (*s) {
1820                s = skip_whitespace(s);
1821                if (!*s)
1822                        break;
1823                n++;
1824                while (*s && !isspace(*s))
1825                        *s1++ = *s++;
1826                *s1++ = '\0';
1827        }
1828        return n;
1829}
1830
1831static void split_f0(void)
1832{
1833/* static char *fstrings; */
1834#define fstrings (G.split_f0__fstrings)
1835
1836        int i, n;
1837        char *s;
1838
1839        if (is_f0_split)
1840                return;
1841
1842        is_f0_split = TRUE;
1843        free(fstrings);
1844        fsrealloc(0);
1845        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1846        fsrealloc(n);
1847        s = fstrings;
1848        for (i = 0; i < n; i++) {
1849                Fields[i].string = nextword(&s);
1850                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1851        }
1852
1853        /* set NF manually to avoid side effects */
1854        clrvar(intvar[NF]);
1855        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1856        intvar[NF]->number = nfields;
1857#undef fstrings
1858}
1859
1860/* perform additional actions when some internal variables changed */
1861static void handle_special(var *v)
1862{
1863        int n;
1864        char *b;
1865        const char *sep, *s;
1866        int sl, l, len, i, bsize;
1867
1868        if (!(v->type & VF_SPECIAL))
1869                return;
1870
1871        if (v == intvar[NF]) {
1872                n = (int)getvar_i(v);
1873                if (n < 0)
1874                        syntax_error("NF set to negative value");
1875                fsrealloc(n);
1876
1877                /* recalculate $0 */
1878                sep = getvar_s(intvar[OFS]);
1879                sl = strlen(sep);
1880                b = NULL;
1881                len = 0;
1882                for (i = 0; i < n; i++) {
1883                        s = getvar_s(&Fields[i]);
1884                        l = strlen(s);
1885                        if (b) {
1886                                memcpy(b+len, sep, sl);
1887                                len += sl;
1888                        }
1889                        b = qrealloc(b, len+l+sl, &bsize);
1890                        memcpy(b+len, s, l);
1891                        len += l;
1892                }
1893                if (b)
1894                        b[len] = '\0';
1895                setvar_p(intvar[F0], b);
1896                is_f0_split = TRUE;
1897
1898        } else if (v == intvar[F0]) {
1899                is_f0_split = FALSE;
1900
1901        } else if (v == intvar[FS]) {
1902                /*
1903                 * The POSIX-2008 standard says that changing FS should have no effect on the
1904                 * current input line, but only on the next one. The language is:
1905                 *
1906                 * > Before the first reference to a field in the record is evaluated, the record
1907                 * > shall be split into fields, according to the rules in Regular Expressions,
1908                 * > using the value of FS that was current at the time the record was read.
1909                 *
1910                 * So, split up current line before assignment to FS:
1911                 */
1912                split_f0();
1913
1914                mk_splitter(getvar_s(v), &fsplitter);
1915        } else if (v == intvar[RS]) {
1916                mk_splitter(getvar_s(v), &rsplitter);
1917        } else if (v == intvar[IGNORECASE]) {
1918                icase = istrue(v);
1919        } else {                                /* $n */
1920                n = getvar_i(intvar[NF]);
1921                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1922                /* right here v is invalid. Just to note... */
1923        }
1924}
1925
1926/* step through func/builtin/etc arguments */
1927static node *nextarg(node **pn)
1928{
1929        node *n;
1930
1931        n = *pn;
1932        if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1933                *pn = n->r.n;
1934                n = n->l.n;
1935        } else {
1936                *pn = NULL;
1937        }
1938        return n;
1939}
1940
1941static void hashwalk_init(var *v, xhash *array)
1942{
1943        hash_item *hi;
1944        unsigned i;
1945        walker_list *w;
1946        walker_list *prev_walker;
1947
1948        if (v->type & VF_WALK) {
1949                prev_walker = v->x.walker;
1950        } else {
1951                v->type |= VF_WALK;
1952                prev_walker = NULL;
1953        }
1954        debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1955
1956        w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1957        debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1958        w->cur = w->end = w->wbuf;
1959        w->prev = prev_walker;
1960        for (i = 0; i < array->csize; i++) {
1961                hi = array->items[i];
1962                while (hi) {
1963                        strcpy(w->end, hi->name);
1964                        nextword(&w->end);
1965                        hi = hi->next;
1966                }
1967        }
1968}
1969
1970static int hashwalk_next(var *v)
1971{
1972        walker_list *w = v->x.walker;
1973
1974        if (w->cur >= w->end) {
1975                walker_list *prev_walker = w->prev;
1976
1977                debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1978                free(w);
1979                v->x.walker = prev_walker;
1980                return FALSE;
1981        }
1982
1983        setvar_s(v, nextword(&w->cur));
1984        return TRUE;
1985}
1986
1987/* evaluate node, return 1 when result is true, 0 otherwise */
1988static int ptest(node *pattern)
1989{
1990        /* ptest__v is "static": to save stack space? */
1991        return istrue(evaluate(pattern, &G.ptest__v));
1992}
1993
1994/* read next record from stream rsm into a variable v */
1995static int awk_getline(rstream *rsm, var *v)
1996{
1997        char *b;
1998        regmatch_t pmatch[2];
1999        int size, a, p, pp = 0;
2000        int fd, so, eo, r, rp;
2001        char c, *m, *s;
2002
2003        debug_printf_eval("entered %s()\n", __func__);
2004
2005        /* we're using our own buffer since we need access to accumulating
2006         * characters
2007         */
2008        fd = fileno(rsm->F);
2009        m = rsm->buffer;
2010        a = rsm->adv;
2011        p = rsm->pos;
2012        size = rsm->size;
2013        c = (char) rsplitter.n.info;
2014        rp = 0;
2015
2016        if (!m)
2017                m = qrealloc(m, 256, &size);
2018
2019        do {
2020                b = m + a;
2021                so = eo = p;
2022                r = 1;
2023                if (p > 0) {
2024                        if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
2025                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2026                                                        b, 1, pmatch, 0) == 0) {
2027                                        so = pmatch[0].rm_so;
2028                                        eo = pmatch[0].rm_eo;
2029                                        if (b[eo] != '\0')
2030                                                break;
2031                                }
2032                        } else if (c != '\0') {
2033                                s = strchr(b+pp, c);
2034                                if (!s)
2035                                        s = memchr(b+pp, '\0', p - pp);
2036                                if (s) {
2037                                        so = eo = s-b;
2038                                        eo++;
2039                                        break;
2040                                }
2041                        } else {
2042                                while (b[rp] == '\n')
2043                                        rp++;
2044                                s = strstr(b+rp, "\n\n");
2045                                if (s) {
2046                                        so = eo = s-b;
2047                                        while (b[eo] == '\n')
2048                                                eo++;
2049                                        if (b[eo] != '\0')
2050                                                break;
2051                                }
2052                        }
2053                }
2054
2055                if (a > 0) {
2056                        memmove(m, m+a, p+1);
2057                        b = m;
2058                        a = 0;
2059                }
2060
2061                m = qrealloc(m, a+p+128, &size);
2062                b = m + a;
2063                pp = p;
2064                p += safe_read(fd, b+p, size-p-1);
2065                if (p < pp) {
2066                        p = 0;
2067                        r = 0;
2068                        setvar_i(intvar[ERRNO], errno);
2069                }
2070                b[p] = '\0';
2071
2072        } while (p > pp);
2073
2074        if (p == 0) {
2075                r--;
2076        } else {
2077                c = b[so]; b[so] = '\0';
2078                setvar_s(v, b+rp);
2079                v->type |= VF_USER;
2080                b[so] = c;
2081                c = b[eo]; b[eo] = '\0';
2082                setvar_s(intvar[RT], b+so);
2083                b[eo] = c;
2084        }
2085
2086        rsm->buffer = m;
2087        rsm->adv = a + eo;
2088        rsm->pos = p - eo;
2089        rsm->size = size;
2090
2091        debug_printf_eval("returning from %s(): %d\n", __func__, r);
2092
2093        return r;
2094}
2095
2096static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2097{
2098        int r = 0;
2099        char c;
2100        const char *s = format;
2101
2102        if (int_as_int && n == (long long)n) {
2103                r = snprintf(b, size, "%lld", (long long)n);
2104        } else {
2105                do { c = *s; } while (c && *++s);
2106                if (strchr("diouxX", c)) {
2107                        r = snprintf(b, size, format, (int)n);
2108                } else if (strchr("eEfgG", c)) {
2109                        r = snprintf(b, size, format, n);
2110                } else {
2111                        syntax_error(EMSG_INV_FMT);
2112                }
2113        }
2114        return r;
2115}
2116
2117/* formatted output into an allocated buffer, return ptr to buffer */
2118static char *awk_printf(node *n)
2119{
2120        char *b = NULL;
2121        char *fmt, *s, *f;
2122        const char *s1;
2123        int i, j, incr, bsize;
2124        char c, c1;
2125        var *v, *arg;
2126
2127        v = nvalloc(1);
2128        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2129
2130        i = 0;
2131        while (*f) {
2132                s = f;
2133                while (*f && (*f != '%' || *++f == '%'))
2134                        f++;
2135                while (*f && !isalpha(*f)) {
2136                        if (*f == '*')
2137                                syntax_error("%*x formats are not supported");
2138                        f++;
2139                }
2140
2141                incr = (f - s) + MAXVARFMT;
2142                b = qrealloc(b, incr + i, &bsize);
2143                c = *f;
2144                if (c != '\0')
2145                        f++;
2146                c1 = *f;
2147                *f = '\0';
2148                arg = evaluate(nextarg(&n), v);
2149
2150                j = i;
2151                if (c == 'c' || !c) {
2152                        i += sprintf(b+i, s, is_numeric(arg) ?
2153                                        (char)getvar_i(arg) : *getvar_s(arg));
2154                } else if (c == 's') {
2155                        s1 = getvar_s(arg);
2156                        b = qrealloc(b, incr+i+strlen(s1), &bsize);
2157                        i += sprintf(b+i, s, s1);
2158                } else {
2159                        i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2160                }
2161                *f = c1;
2162
2163                /* if there was an error while sprintf, return value is negative */
2164                if (i < j)
2165                        i = j;
2166        }
2167
2168        free(fmt);
2169        nvfree(v);
2170        b = xrealloc(b, i + 1);
2171        b[i] = '\0';
2172        return b;
2173}
2174
2175/* Common substitution routine.
2176 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2177 * store result into (dest), return number of substitutions.
2178 * If nm = 0, replace all matches.
2179 * If src or dst is NULL, use $0.
2180 * If subexp != 0, enable subexpression matching (\1-\9).
2181 */
2182static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2183{
2184        char *resbuf;
2185        const char *sp;
2186        int match_no, residx, replen, resbufsize;
2187        int regexec_flags;
2188        regmatch_t pmatch[10];
2189        regex_t sreg, *regex;
2190
2191        resbuf = NULL;
2192        residx = 0;
2193        match_no = 0;
2194        regexec_flags = 0;
2195        regex = as_regex(rn, &sreg);
2196        sp = getvar_s(src ? src : intvar[F0]);
2197        replen = strlen(repl);
2198        while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2199                int so = pmatch[0].rm_so;
2200                int eo = pmatch[0].rm_eo;
2201
2202                //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2203                resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2204                memcpy(resbuf + residx, sp, eo);
2205                residx += eo;
2206                if (++match_no >= nm) {
2207                        const char *s;
2208                        int nbs;
2209
2210                        /* replace */
2211                        residx -= (eo - so);
2212                        nbs = 0;
2213                        for (s = repl; *s; s++) {
2214                                char c = resbuf[residx++] = *s;
2215                                if (c == '\\') {
2216                                        nbs++;
2217                                        continue;
2218                                }
2219                                if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2220                                        int j;
2221                                        residx -= ((nbs + 3) >> 1);
2222                                        j = 0;
2223                                        if (c != '&') {
2224                                                j = c - '0';
2225                                                nbs++;
2226                                        }
2227                                        if (nbs % 2) {
2228                                                resbuf[residx++] = c;
2229                                        } else {
2230                                                int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2231                                                resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2232                                                memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2233                                                residx += n;
2234                                        }
2235                                }
2236                                nbs = 0;
2237                        }
2238                }
2239
2240                regexec_flags = REG_NOTBOL;
2241                sp += eo;
2242                if (match_no == nm)
2243                        break;
2244                if (eo == so) {
2245                        /* Empty match (e.g. "b*" will match anywhere).
2246                         * Advance by one char. */
2247//BUG (bug 1333):
2248//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2249//... and will erroneously match "b" even though it is NOT at the word start.
2250//we need REG_NOTBOW but it does not exist...
2251//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2252//it should be able to do it correctly.
2253                        /* Subtle: this is safe only because
2254                         * qrealloc allocated at least one extra byte */
2255                        resbuf[residx] = *sp;
2256                        if (*sp == '\0')
2257                                goto ret;
2258                        sp++;
2259                        residx++;
2260                }
2261        }
2262
2263        resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2264        strcpy(resbuf + residx, sp);
2265 ret:
2266        //bb_error_msg("end sp:'%s'%p", sp,sp);
2267        setvar_p(dest ? dest : intvar[F0], resbuf);
2268        if (regex == &sreg)
2269                regfree(regex);
2270        return match_no;
2271}
2272
2273static NOINLINE int do_mktime(const char *ds)
2274{
2275        struct tm then;
2276        int count;
2277
2278        /*memset(&then, 0, sizeof(then)); - not needed */
2279        then.tm_isdst = -1; /* default is unknown */
2280
2281        /* manpage of mktime says these fields are ints,
2282         * so we can sscanf stuff directly into them */
2283        count = sscanf(ds, "%u %u %u %u %u %u %d",
2284                &then.tm_year, &then.tm_mon, &then.tm_mday,
2285                &then.tm_hour, &then.tm_min, &then.tm_sec,
2286                &then.tm_isdst);
2287
2288        if (count < 6
2289         || (unsigned)then.tm_mon < 1
2290         || (unsigned)then.tm_year < 1900
2291        ) {
2292                return -1;
2293        }
2294
2295        then.tm_mon -= 1;
2296        then.tm_year -= 1900;
2297
2298        return mktime(&then);
2299}
2300
2301static NOINLINE var *exec_builtin(node *op, var *res)
2302{
2303#define tspl (G.exec_builtin__tspl)
2304
2305        var *tv;
2306        node *an[4];
2307        var *av[4];
2308        const char *as[4];
2309        regmatch_t pmatch[2];
2310        regex_t sreg, *re;
2311        node *spl;
2312        uint32_t isr, info;
2313        int nargs;
2314        time_t tt;
2315        int i, l, ll, n;
2316
2317        tv = nvalloc(4);
2318        isr = info = op->info;
2319        op = op->l.n;
2320
2321        av[2] = av[3] = NULL;
2322        for (i = 0; i < 4 && op; i++) {
2323                an[i] = nextarg(&op);
2324                if (isr & 0x09000000)
2325                        av[i] = evaluate(an[i], &tv[i]);
2326                if (isr & 0x08000000)
2327                        as[i] = getvar_s(av[i]);
2328                isr >>= 1;
2329        }
2330
2331        nargs = i;
2332        if ((uint32_t)nargs < (info >> 30))
2333                syntax_error(EMSG_TOO_FEW_ARGS);
2334
2335        info &= OPNMASK;
2336        switch (info) {
2337
2338        case B_a2:
2339                if (ENABLE_FEATURE_AWK_LIBM)
2340                        setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2341                else
2342                        syntax_error(EMSG_NO_MATH);
2343                break;
2344
2345        case B_sp: {
2346                char *s, *s1;
2347
2348                if (nargs > 2) {
2349                        spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2350                                an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2351                } else {
2352                        spl = &fsplitter.n;
2353                }
2354
2355                n = awk_split(as[0], spl, &s);
2356                s1 = s;
2357                clear_array(iamarray(av[1]));
2358                for (i = 1; i <= n; i++)
2359                        setari_u(av[1], i, nextword(&s));
2360                free(s1);
2361                setvar_i(res, n);
2362                break;
2363        }
2364
2365        case B_ss: {
2366                char *s;
2367
2368                l = strlen(as[0]);
2369                i = getvar_i(av[1]) - 1;
2370                if (i > l)
2371                        i = l;
2372                if (i < 0)
2373                        i = 0;
2374                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2375                if (n < 0)
2376                        n = 0;
2377                s = xstrndup(as[0]+i, n);
2378                setvar_p(res, s);
2379                break;
2380        }
2381
2382        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2383         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2384        case B_an:
2385                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2386                break;
2387
2388        case B_co:
2389                setvar_i(res, ~getvar_i_int(av[0]));
2390                break;
2391
2392        case B_ls:
2393                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2394                break;
2395
2396        case B_or:
2397                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2398                break;
2399
2400        case B_rs:
2401                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2402                break;
2403
2404        case B_xo:
2405                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2406                break;
2407
2408        case B_lo:
2409        case B_up: {
2410                char *s, *s1;
2411                s1 = s = xstrdup(as[0]);
2412                while (*s1) {
2413                        //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2414                        if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2415                                *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2416                        s1++;
2417                }
2418                setvar_p(res, s);
2419                break;
2420        }
2421
2422        case B_ix:
2423                n = 0;
2424                ll = strlen(as[1]);
2425                l = strlen(as[0]) - ll;
2426                if (ll > 0 && l >= 0) {
2427                        if (!icase) {
2428                                char *s = strstr(as[0], as[1]);
2429                                if (s)
2430                                        n = (s - as[0]) + 1;
2431                        } else {
2432                                /* this piece of code is terribly slow and
2433                                 * really should be rewritten
2434                                 */
2435                                for (i = 0; i <= l; i++) {
2436                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2437                                                n = i+1;
2438                                                break;
2439                                        }
2440                                }
2441                        }
2442                }
2443                setvar_i(res, n);
2444                break;
2445
2446        case B_ti:
2447                if (nargs > 1)
2448                        tt = getvar_i(av[1]);
2449                else
2450                        time(&tt);
2451                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2452                i = strftime(g_buf, MAXVARFMT,
2453                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2454                        localtime(&tt));
2455                g_buf[i] = '\0';
2456                setvar_s(res, g_buf);
2457                break;
2458
2459        case B_mt:
2460                setvar_i(res, do_mktime(as[0]));
2461                break;
2462
2463        case B_ma:
2464                re = as_regex(an[1], &sreg);
2465                n = regexec(re, as[0], 1, pmatch, 0);
2466                if (n == 0) {
2467                        pmatch[0].rm_so++;
2468                        pmatch[0].rm_eo++;
2469                } else {
2470                        pmatch[0].rm_so = 0;
2471                        pmatch[0].rm_eo = -1;
2472                }
2473                setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2474                setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2475                setvar_i(res, pmatch[0].rm_so);
2476                if (re == &sreg)
2477                        regfree(re);
2478                break;
2479
2480        case B_ge:
2481                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2482                break;
2483
2484        case B_gs:
2485                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2486                break;
2487
2488        case B_su:
2489                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2490                break;
2491        }
2492
2493        nvfree(tv);
2494        return res;
2495#undef tspl
2496}
2497
2498/*
2499 * Evaluate node - the heart of the program. Supplied with subtree
2500 * and place where to store result. returns ptr to result.
2501 */
2502#define XC(n) ((n) >> 8)
2503
2504static var *evaluate(node *op, var *res)
2505{
2506/* This procedure is recursive so we should count every byte */
2507#define fnargs (G.evaluate__fnargs)
2508/* seed is initialized to 1 */
2509#define seed   (G.evaluate__seed)
2510#define sreg   (G.evaluate__sreg)
2511
2512        var *v1;
2513
2514        if (!op)
2515                return setvar_s(res, NULL);
2516
2517        debug_printf_eval("entered %s()\n", __func__);
2518
2519        v1 = nvalloc(2);
2520
2521        while (op) {
2522                struct {
2523                        var *v;
2524                        const char *s;
2525                } L = L; /* for compiler */
2526                struct {
2527                        var *v;
2528                        const char *s;
2529                } R = R;
2530                double L_d = L_d;
2531                uint32_t opinfo;
2532                int opn;
2533                node *op1;
2534
2535                opinfo = op->info;
2536                opn = (opinfo & OPNMASK);
2537                g_lineno = op->lineno;
2538                op1 = op->l.n;
2539                debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2540
2541                /* "delete" is special:
2542                 * "delete array[var--]" must evaluate index expr only once,
2543                 * must not evaluate it in "execute inevitable things" part.
2544                 */
2545                if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
2546                        uint32_t info = op1->info & OPCLSMASK;
2547                        var *v;
2548
2549                        debug_printf_eval("DELETE\n");
2550                        if (info == OC_VAR) {
2551                                v = op1->l.v;
2552                        } else if (info == OC_FNARG) {
2553                                v = &fnargs[op1->l.aidx];
2554                        } else {
2555                                syntax_error(EMSG_NOT_ARRAY);
2556                        }
2557                        if (op1->r.n) { /* array ref? */
2558                                const char *s;
2559                                s = getvar_s(evaluate(op1->r.n, v1));
2560                                hash_remove(iamarray(v), s);
2561                        } else {
2562                                clear_array(iamarray(v));
2563                        }
2564                        goto next;
2565                }
2566
2567                /* execute inevitable things */
2568                if (opinfo & OF_RES1)
2569                        L.v = evaluate(op1, v1);
2570                if (opinfo & OF_RES2)
2571                        R.v = evaluate(op->r.n, v1+1);
2572                if (opinfo & OF_STR1) {
2573                        L.s = getvar_s(L.v);
2574                        debug_printf_eval("L.s:'%s'\n", L.s);
2575                }
2576                if (opinfo & OF_STR2) {
2577                        R.s = getvar_s(R.v);
2578                        debug_printf_eval("R.s:'%s'\n", R.s);
2579                }
2580                if (opinfo & OF_NUM1) {
2581                        L_d = getvar_i(L.v);
2582                        debug_printf_eval("L_d:%f\n", L_d);
2583                }
2584
2585                debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2586                switch (XC(opinfo & OPCLSMASK)) {
2587
2588                /* -- iterative node type -- */
2589
2590                /* test pattern */
2591                case XC( OC_TEST ):
2592                        if ((op1->info & OPCLSMASK) == OC_COMMA) {
2593                                /* it's range pattern */
2594                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2595                                        op->info |= OF_CHECKED;
2596                                        if (ptest(op1->r.n))
2597                                                op->info &= ~OF_CHECKED;
2598                                        op = op->a.n;
2599                                } else {
2600                                        op = op->r.n;
2601                                }
2602                        } else {
2603                                op = ptest(op1) ? op->a.n : op->r.n;
2604                        }
2605                        break;
2606
2607                /* just evaluate an expression, also used as unconditional jump */
2608                case XC( OC_EXEC ):
2609                        break;
2610
2611                /* branch, used in if-else and various loops */
2612                case XC( OC_BR ):
2613                        op = istrue(L.v) ? op->a.n : op->r.n;
2614                        break;
2615
2616                /* initialize for-in loop */
2617                case XC( OC_WALKINIT ):
2618                        hashwalk_init(L.v, iamarray(R.v));
2619                        break;
2620
2621                /* get next array item */
2622                case XC( OC_WALKNEXT ):
2623                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2624                        break;
2625
2626                case XC( OC_PRINT ):
2627                case XC( OC_PRINTF ): {
2628                        FILE *F = stdout;
2629
2630                        if (op->r.n) {
2631                                rstream *rsm = newfile(R.s);
2632                                if (!rsm->F) {
2633                                        if (opn == '|') {
2634                                                rsm->F = popen(R.s, "w");
2635                                                if (rsm->F == NULL)
2636                                                        bb_perror_msg_and_die("popen");
2637                                                rsm->is_pipe = 1;
2638                                        } else {
2639                                                rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2640                                        }
2641                                }
2642                                F = rsm->F;
2643                        }
2644
2645                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2646                                if (!op1) {
2647                                        fputs(getvar_s(intvar[F0]), F);
2648                                } else {
2649                                        while (op1) {
2650                                                var *v = evaluate(nextarg(&op1), v1);
2651                                                if (v->type & VF_NUMBER) {
2652                                                        fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2653                                                                        getvar_i(v), TRUE);
2654                                                        fputs(g_buf, F);
2655                                                } else {
2656                                                        fputs(getvar_s(v), F);
2657                                                }
2658
2659                                                if (op1)
2660                                                        fputs(getvar_s(intvar[OFS]), F);
2661                                        }
2662                                }
2663                                fputs(getvar_s(intvar[ORS]), F);
2664
2665                        } else {        /* OC_PRINTF */
2666                                char *s = awk_printf(op1);
2667                                fputs(s, F);
2668                                free(s);
2669                        }
2670                        fflush(F);
2671                        break;
2672                }
2673
2674                /* case XC( OC_DELETE ): - moved to happen before arg evaluation */
2675
2676                case XC( OC_NEWSOURCE ):
2677                        g_progname = op->l.new_progname;
2678                        break;
2679
2680                case XC( OC_RETURN ):
2681                        copyvar(res, L.v);
2682                        break;
2683
2684                case XC( OC_NEXTFILE ):
2685                        nextfile = TRUE;
2686                case XC( OC_NEXT ):
2687                        nextrec = TRUE;
2688                case XC( OC_DONE ):
2689                        clrvar(res);
2690                        break;
2691
2692                case XC( OC_EXIT ):
2693                        awk_exit(L_d);
2694
2695                /* -- recursive node type -- */
2696
2697                case XC( OC_VAR ):
2698                        debug_printf_eval("VAR\n");
2699                        L.v = op->l.v;
2700                        if (L.v == intvar[NF])
2701                                split_f0();
2702                        goto v_cont;
2703
2704                case XC( OC_FNARG ):
2705                        debug_printf_eval("FNARG[%d]\n", op->l.aidx);
2706                        L.v = &fnargs[op->l.aidx];
2707 v_cont:
2708                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2709                        break;
2710
2711                case XC( OC_IN ):
2712                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2713                        break;
2714
2715                case XC( OC_REGEXP ):
2716                        op1 = op;
2717                        L.s = getvar_s(intvar[F0]);
2718                        goto re_cont;
2719
2720                case XC( OC_MATCH ):
2721                        op1 = op->r.n;
2722 re_cont:
2723                        {
2724                                regex_t *re = as_regex(op1, &sreg);
2725                                int i = regexec(re, L.s, 0, NULL, 0);
2726                                if (re == &sreg)
2727                                        regfree(re);
2728                                setvar_i(res, (i == 0) ^ (opn == '!'));
2729                        }
2730                        break;
2731
2732                case XC( OC_MOVE ):
2733                        debug_printf_eval("MOVE\n");
2734                        /* if source is a temporary string, jusk relink it to dest */
2735//Disabled: if R.v is numeric but happens to have cached R.v->string,
2736//then L.v ends up being a string, which is wrong
2737//                      if (R.v == v1+1 && R.v->string) {
2738//                              res = setvar_p(L.v, R.v->string);
2739//                              R.v->string = NULL;
2740//                      } else {
2741                                res = copyvar(L.v, R.v);
2742//                      }
2743                        break;
2744
2745                case XC( OC_TERNARY ):
2746                        if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2747                                syntax_error(EMSG_POSSIBLE_ERROR);
2748                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2749                        break;
2750
2751                case XC( OC_FUNC ): {
2752                        var *vbeg, *v;
2753                        const char *sv_progname;
2754
2755                        /* The body might be empty, still has to eval the args */
2756                        if (!op->r.n->info && !op->r.f->body.first)
2757                                syntax_error(EMSG_UNDEF_FUNC);
2758
2759                        vbeg = v = nvalloc(op->r.f->nargs + 1);
2760                        while (op1) {
2761                                var *arg = evaluate(nextarg(&op1), v1);
2762                                copyvar(v, arg);
2763                                v->type |= VF_CHILD;
2764                                v->x.parent = arg;
2765                                if (++v - vbeg >= op->r.f->nargs)
2766                                        break;
2767                        }
2768
2769                        v = fnargs;
2770                        fnargs = vbeg;
2771                        sv_progname = g_progname;
2772
2773                        res = evaluate(op->r.f->body.first, res);
2774
2775                        g_progname = sv_progname;
2776                        nvfree(fnargs);
2777                        fnargs = v;
2778
2779                        break;
2780                }
2781
2782                case XC( OC_GETLINE ):
2783                case XC( OC_PGETLINE ): {
2784                        rstream *rsm;
2785                        int i;
2786
2787                        if (op1) {
2788                                rsm = newfile(L.s);
2789                                if (!rsm->F) {
2790                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2791                                                rsm->F = popen(L.s, "r");
2792                                                rsm->is_pipe = TRUE;
2793                                        } else {
2794                                                rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2795                                        }
2796                                }
2797                        } else {
2798                                if (!iF)
2799                                        iF = next_input_file();
2800                                rsm = iF;
2801                        }
2802
2803                        if (!rsm || !rsm->F) {
2804                                setvar_i(intvar[ERRNO], errno);
2805                                setvar_i(res, -1);
2806                                break;
2807                        }
2808
2809                        if (!op->r.n)
2810                                R.v = intvar[F0];
2811
2812                        i = awk_getline(rsm, R.v);
2813                        if (i > 0 && !op1) {
2814                                incvar(intvar[FNR]);
2815                                incvar(intvar[NR]);
2816                        }
2817                        setvar_i(res, i);
2818                        break;
2819                }
2820
2821                /* simple builtins */
2822                case XC( OC_FBLTIN ): {
2823                        double R_d = R_d; /* for compiler */
2824
2825                        switch (opn) {
2826                        case F_in:
2827                                R_d = (long long)L_d;
2828                                break;
2829
2830                        case F_rn:
2831                                R_d = (double)rand() / (double)RAND_MAX;
2832                                break;
2833
2834                        case F_co:
2835                                if (ENABLE_FEATURE_AWK_LIBM) {
2836                                        R_d = cos(L_d);
2837                                        break;
2838                                }
2839
2840                        case F_ex:
2841                                if (ENABLE_FEATURE_AWK_LIBM) {
2842                                        R_d = exp(L_d);
2843                                        break;
2844                                }
2845
2846                        case F_lg:
2847                                if (ENABLE_FEATURE_AWK_LIBM) {
2848                                        R_d = log(L_d);
2849                                        break;
2850                                }
2851
2852                        case F_si:
2853                                if (ENABLE_FEATURE_AWK_LIBM) {
2854                                        R_d = sin(L_d);
2855                                        break;
2856                                }
2857
2858                        case F_sq:
2859                                if (ENABLE_FEATURE_AWK_LIBM) {
2860                                        R_d = sqrt(L_d);
2861                                        break;
2862                                }
2863
2864                                syntax_error(EMSG_NO_MATH);
2865                                break;
2866
2867                        case F_sr:
2868                                R_d = (double)seed;
2869                                seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2870                                srand(seed);
2871                                break;
2872
2873                        case F_ti:
2874                                R_d = time(NULL);
2875                                break;
2876
2877                        case F_le:
2878                                debug_printf_eval("length: L.s:'%s'\n", L.s);
2879                                if (!op1) {
2880                                        L.s = getvar_s(intvar[F0]);
2881                                        debug_printf_eval("length: L.s='%s'\n", L.s);
2882                                }
2883                                else if (L.v->type & VF_ARRAY) {
2884                                        R_d = L.v->x.array->nel;
2885                                        debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2886                                        break;
2887                                }
2888                                R_d = strlen(L.s);
2889                                break;
2890
2891                        case F_sy:
2892                                fflush_all();
2893                                R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2894                                                ? (system(L.s) >> 8) : 0;
2895                                break;
2896
2897                        case F_ff:
2898                                if (!op1) {
2899                                        fflush(stdout);
2900                                } else if (L.s && *L.s) {
2901                                        rstream *rsm = newfile(L.s);
2902                                        fflush(rsm->F);
2903                                } else {
2904                                        fflush_all();
2905                                }
2906                                break;
2907
2908                        case F_cl: {
2909                                rstream *rsm;
2910                                int err = 0;
2911                                rsm = (rstream *)hash_search(fdhash, L.s);
2912                                debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2913                                if (rsm) {
2914                                        debug_printf_eval("OC_FBLTIN F_cl "
2915                                                "rsm->is_pipe:%d, ->F:%p\n",
2916                                                rsm->is_pipe, rsm->F);
2917                                        /* Can be NULL if open failed. Example:
2918                                         * getline line <"doesnt_exist";
2919                                         * close("doesnt_exist"); <--- here rsm->F is NULL
2920                                         */
2921                                        if (rsm->F)
2922                                                err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2923                                        free(rsm->buffer);
2924                                        hash_remove(fdhash, L.s);
2925                                }
2926                                if (err)
2927                                        setvar_i(intvar[ERRNO], errno);
2928                                R_d = (double)err;
2929                                break;
2930                        }
2931                        } /* switch */
2932                        setvar_i(res, R_d);
2933                        break;
2934                }
2935
2936                case XC( OC_BUILTIN ):
2937                        res = exec_builtin(op, res);
2938                        break;
2939
2940                case XC( OC_SPRINTF ):
2941                        setvar_p(res, awk_printf(op1));
2942                        break;
2943
2944                case XC( OC_UNARY ): {
2945                        double Ld, R_d;
2946
2947                        Ld = R_d = getvar_i(R.v);
2948                        switch (opn) {
2949                        case 'P':
2950                                Ld = ++R_d;
2951                                goto r_op_change;
2952                        case 'p':
2953                                R_d++;
2954                                goto r_op_change;
2955                        case 'M':
2956                                Ld = --R_d;
2957                                goto r_op_change;
2958                        case 'm':
2959                                R_d--;
2960 r_op_change:
2961                                setvar_i(R.v, R_d);
2962                                break;
2963                        case '!':
2964                                Ld = !istrue(R.v);
2965                                break;
2966                        case '-':
2967                                Ld = -R_d;
2968                                break;
2969                        }
2970                        setvar_i(res, Ld);
2971                        break;
2972                }
2973
2974                case XC( OC_FIELD ): {
2975                        int i = (int)getvar_i(R.v);
2976                        if (i < 0)
2977                                syntax_error(EMSG_NEGATIVE_FIELD);
2978                        if (i == 0) {
2979                                res = intvar[F0];
2980                        } else {
2981                                split_f0();
2982                                if (i > nfields)
2983                                        fsrealloc(i);
2984                                res = &Fields[i - 1];
2985                        }
2986                        break;
2987                }
2988
2989                /* concatenation (" ") and index joining (",") */
2990                case XC( OC_CONCAT ):
2991                case XC( OC_COMMA ): {
2992                        const char *sep = "";
2993                        if ((opinfo & OPCLSMASK) == OC_COMMA)
2994                                sep = getvar_s(intvar[SUBSEP]);
2995                        setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2996                        break;
2997                }
2998
2999                case XC( OC_LAND ):
3000                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3001                        break;
3002
3003                case XC( OC_LOR ):
3004                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3005                        break;
3006
3007                case XC( OC_BINARY ):
3008                case XC( OC_REPLACE ): {
3009                        double R_d = getvar_i(R.v);
3010                        debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
3011                        switch (opn) {
3012                        case '+':
3013                                L_d += R_d;
3014                                break;
3015                        case '-':
3016                                L_d -= R_d;
3017                                break;
3018                        case '*':
3019                                L_d *= R_d;
3020                                break;
3021                        case '/':
3022                                if (R_d == 0)
3023                                        syntax_error(EMSG_DIV_BY_ZERO);
3024                                L_d /= R_d;
3025                                break;
3026                        case '&':
3027                                if (ENABLE_FEATURE_AWK_LIBM)
3028                                        L_d = pow(L_d, R_d);
3029                                else
3030                                        syntax_error(EMSG_NO_MATH);
3031                                break;
3032                        case '%':
3033                                if (R_d == 0)
3034                                        syntax_error(EMSG_DIV_BY_ZERO);
3035                                L_d -= (long long)(L_d / R_d) * R_d;
3036                                break;
3037                        }
3038                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3039                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3040                        break;
3041                }
3042
3043                case XC( OC_COMPARE ): {
3044                        int i = i; /* for compiler */
3045                        double Ld;
3046
3047                        if (is_numeric(L.v) && is_numeric(R.v)) {
3048                                Ld = getvar_i(L.v) - getvar_i(R.v);
3049                        } else {
3050                                const char *l = getvar_s(L.v);
3051                                const char *r = getvar_s(R.v);
3052                                Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3053                        }
3054                        switch (opn & 0xfe) {
3055                        case 0:
3056                                i = (Ld > 0);
3057                                break;
3058                        case 2:
3059                                i = (Ld >= 0);
3060                                break;
3061                        case 4:
3062                                i = (Ld == 0);
3063                                break;
3064                        }
3065                        setvar_i(res, (i == 0) ^ (opn & 1));
3066                        break;
3067                }
3068
3069                default:
3070                        syntax_error(EMSG_POSSIBLE_ERROR);
3071                } /* switch */
3072 next:
3073                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3074                        op = op->a.n;
3075                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3076                        break;
3077                if (nextrec)
3078                        break;
3079        } /* while (op) */
3080
3081        nvfree(v1);
3082        debug_printf_eval("returning from %s(): %p\n", __func__, res);
3083        return res;
3084#undef fnargs
3085#undef seed
3086#undef sreg
3087}
3088
3089
3090/* -------- main & co. -------- */
3091
3092static int awk_exit(int r)
3093{
3094        var tv;
3095        unsigned i;
3096        hash_item *hi;
3097
3098        zero_out_var(&tv);
3099
3100        if (!exiting) {
3101                exiting = TRUE;
3102                nextrec = FALSE;
3103                evaluate(endseq.first, &tv);
3104        }
3105
3106        /* waiting for children */
3107        for (i = 0; i < fdhash->csize; i++) {
3108                hi = fdhash->items[i];
3109                while (hi) {
3110                        if (hi->data.rs.F && hi->data.rs.is_pipe)
3111                                pclose(hi->data.rs.F);
3112                        hi = hi->next;
3113                }
3114        }
3115
3116        exit(r);
3117}
3118
3119/* if expr looks like "var=value", perform assignment and return 1,
3120 * otherwise return 0 */
3121static int is_assignment(const char *expr)
3122{
3123        char *exprc, *val;
3124
3125        if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3126                return FALSE;
3127        }
3128
3129        exprc = xstrdup(expr);
3130        val = exprc + (val - expr);
3131        *val++ = '\0';
3132
3133        unescape_string_in_place(val);
3134        setvar_u(newvar(exprc), val);
3135        free(exprc);
3136        return TRUE;
3137}
3138
3139/* switch to next input file */
3140static rstream *next_input_file(void)
3141{
3142#define rsm          (G.next_input_file__rsm)
3143#define files_happen (G.next_input_file__files_happen)
3144
3145        FILE *F;
3146        const char *fname, *ind;
3147
3148        if (rsm.F)
3149                fclose(rsm.F);
3150        rsm.F = NULL;
3151        rsm.pos = rsm.adv = 0;
3152
3153        for (;;) {
3154                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3155                        if (files_happen)
3156                                return NULL;
3157                        fname = "-";
3158                        F = stdin;
3159                        break;
3160                }
3161                ind = getvar_s(incvar(intvar[ARGIND]));
3162                fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3163                if (fname && *fname && !is_assignment(fname)) {
3164                        F = xfopen_stdin(fname);
3165                        break;
3166                }
3167        }
3168
3169        files_happen = TRUE;
3170        setvar_s(intvar[FILENAME], fname);
3171        rsm.F = F;
3172        return &rsm;
3173#undef rsm
3174#undef files_happen
3175}
3176
3177int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3178int awk_main(int argc UNUSED_PARAM, char **argv)
3179{
3180        unsigned opt;
3181        char *opt_F;
3182        llist_t *list_v = NULL;
3183        llist_t *list_f = NULL;
3184#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3185        llist_t *list_e = NULL;
3186#endif
3187        int i, j;
3188        var *v;
3189        var tv;
3190        char **envp;
3191        char *vnames = (char *)vNames; /* cheat */
3192        char *vvalues = (char *)vValues;
3193
3194        INIT_G();
3195
3196        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3197         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3198        if (ENABLE_LOCALE_SUPPORT)
3199                setlocale(LC_NUMERIC, "C");
3200
3201        zero_out_var(&tv);
3202
3203        /* allocate global buffer */
3204        g_buf = xmalloc(MAXVARFMT + 1);
3205
3206        vhash = hash_init();
3207        ahash = hash_init();
3208        fdhash = hash_init();
3209        fnhash = hash_init();
3210
3211        /* initialize variables */
3212        for (i = 0; *vnames; i++) {
3213                intvar[i] = v = newvar(nextword(&vnames));
3214                if (*vvalues != '\377')
3215                        setvar_s(v, nextword(&vvalues));
3216                else
3217                        setvar_i(v, 0);
3218
3219                if (*vnames == '*') {
3220                        v->type |= VF_SPECIAL;
3221                        vnames++;
3222                }
3223        }
3224
3225        handle_special(intvar[FS]);
3226        handle_special(intvar[RS]);
3227
3228        newfile("/dev/stdin")->F = stdin;
3229        newfile("/dev/stdout")->F = stdout;
3230        newfile("/dev/stderr")->F = stderr;
3231
3232        /* Huh, people report that sometimes environ is NULL. Oh well. */
3233        if (environ) for (envp = environ; *envp; envp++) {
3234                /* environ is writable, thus we don't strdup it needlessly */
3235                char *s = *envp;
3236                char *s1 = strchr(s, '=');
3237                if (s1) {
3238                        *s1 = '\0';
3239                        /* Both findvar and setvar_u take const char*
3240                         * as 2nd arg -> environment is not trashed */
3241                        setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3242                        *s1 = '=';
3243                }
3244        }
3245        opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3246        argv += optind;
3247        //argc -= optind;
3248        if (opt & OPT_W)
3249                bb_error_msg("warning: option -W is ignored");
3250        if (opt & OPT_F) {
3251                unescape_string_in_place(opt_F);
3252                setvar_s(intvar[FS], opt_F);
3253        }
3254        while (list_v) {
3255                if (!is_assignment(llist_pop(&list_v)))
3256                        bb_show_usage();
3257        }
3258        while (list_f) {
3259                char *s = NULL;
3260                FILE *from_file;
3261
3262                g_progname = llist_pop(&list_f);
3263                from_file = xfopen_stdin(g_progname);
3264                /* one byte is reserved for some trick in next_token */
3265                for (i = j = 1; j > 0; i += j) {
3266                        s = xrealloc(s, i + 4096);
3267                        j = fread(s + i, 1, 4094, from_file);
3268                }
3269                s[i] = '\0';
3270                fclose(from_file);
3271                parse_program(s + 1);
3272                free(s);
3273        }
3274        g_progname = "cmd. line";
3275#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3276        while (list_e) {
3277                parse_program(llist_pop(&list_e));
3278        }
3279#endif
3280        if (!(opt & (OPT_f | OPT_e))) {
3281                if (!*argv)
3282                        bb_show_usage();
3283                parse_program(*argv++);
3284        }
3285
3286        /* fill in ARGV array */
3287        setari_u(intvar[ARGV], 0, "awk");
3288        i = 0;
3289        while (*argv)
3290                setari_u(intvar[ARGV], ++i, *argv++);
3291        setvar_i(intvar[ARGC], i + 1);
3292
3293        evaluate(beginseq.first, &tv);
3294        if (!mainseq.first && !endseq.first)
3295                awk_exit(EXIT_SUCCESS);
3296
3297        /* input file could already be opened in BEGIN block */
3298        if (!iF)
3299                iF = next_input_file();
3300
3301        /* passing through input files */
3302        while (iF) {
3303                nextfile = FALSE;
3304                setvar_i(intvar[FNR], 0);
3305
3306                while ((i = awk_getline(iF, intvar[F0])) > 0) {
3307                        nextrec = FALSE;
3308                        incvar(intvar[NR]);
3309                        incvar(intvar[FNR]);
3310                        evaluate(mainseq.first, &tv);
3311
3312                        if (nextfile)
3313                                break;
3314                }
3315
3316                if (i < 0)
3317                        syntax_error(strerror(errno));
3318
3319                iF = next_input_file();
3320        }
3321
3322        awk_exit(EXIT_SUCCESS);
3323        /*return 0;*/
3324}
3325